From 01d8e09fdba0c9d3fe316a86e5ce4481e6ed71d1 Mon Sep 17 00:00:00 2001 From: Senran Zhang Date: Tue, 26 Nov 2019 10:15:14 +0800 Subject: [PATCH 001/591] [clang][CodeGen] Fix wrong memcpy size of no_unique_address in FieldMemcpyizer When generating ctor, FieldMemcpyizer wrongly treated zero-sized class members as what should be copied, and generated wrong memcpy size under some special circumstances. This patch tries to fix it. Reviewed By: MaskRay, rjmccall Differential Revision: https://reviews.llvm.org/D70671 --- clang/lib/CodeGen/CGClass.cpp | 2 ++ clang/test/CodeGenCXX/no-unique-address-2.cpp | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 clang/test/CodeGenCXX/no-unique-address-2.cpp diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 04ef912b18bd4..d07b1c665cc43 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -914,6 +914,8 @@ namespace { } void addMemcpyableField(FieldDecl *F) { + if (F->isZeroSize(CGF.getContext())) + return; if (!FirstField) addInitialField(F); else diff --git a/clang/test/CodeGenCXX/no-unique-address-2.cpp b/clang/test/CodeGenCXX/no-unique-address-2.cpp new file mode 100644 index 0000000000000..aa0c67758a192 --- /dev/null +++ b/clang/test/CodeGenCXX/no-unique-address-2.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -std=c++2a %s -emit-llvm -o - -triple x86_64-linux-gnu | FileCheck %s + +struct TriviallyCopyable {}; + +struct NonTriviallyCopyable { + NonTriviallyCopyable() = default; + NonTriviallyCopyable(const NonTriviallyCopyable&) = default; + NonTriviallyCopyable(NonTriviallyCopyable &&) {} +}; + +struct Foo { + int i; + [[no_unique_address]] TriviallyCopyable m; + [[no_unique_address]] NonTriviallyCopyable n; +}; + +void call() { + Foo foo; + Foo foo2(static_cast(foo)); +} + +// The memcpy call should copy exact 4 bytes for member 'int i' +// CHECK: define {{.*}} void @_ZN3FooC2EOS_ +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.+}}, i8* {{.+}}, i64 4, i1 false) +// CHECK: call void @_ZN20NonTriviallyCopyableC2EOS_ From c43b8ec735e88472593ca420a5c6a17630f94066 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 Nov 2019 17:14:52 -0800 Subject: [PATCH 002/591] [X86] Add support for STRICT_FP_ROUND/STRICT_FP_EXTEND from/to fp128 to/from f32/f64/f80 in 64-bit mode. These need to emit a libcall like we do for the non-strict version. 32-bit mode needs to SoftenFloat support to be implemented for strict FP nodes. Differential Revision: https://reviews.llvm.org/D70504 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 54 +++++-- llvm/test/CodeGen/X86/fp128-cast-strict.ll | 175 +++++++++++++++++++++ 2 files changed, 212 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/X86/fp128-cast-strict.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c658363f8d6a1..33f50e518bbe0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -690,7 +690,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom); // We need to custom handle any FP_ROUND with an f128 input, but // LegalizeDAG uses the result type to know when to run a custom handler. // So we have to list all legal floating point result types here. @@ -19714,9 +19715,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { } SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op->isStrictFPOpcode(); + SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); MVT SVT = In.getSimpleValueType(); if (VT == MVT::f128) { @@ -19725,6 +19728,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { } assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); + // FIXME: Strict fp. + assert(!IsStrict && "Strict FP not supported yet!"); return DAG.getNode(X86ISD::VFPEXT, DL, VT, DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, @@ -19732,8 +19737,10 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { } SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op->isStrictFPOpcode(); + MVT VT = Op.getSimpleValueType(); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); MVT SVT = In.getSimpleValueType(); // It's legal except when f128 is involved @@ -19745,17 +19752,17 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { // FP_ROUND node has a second operand indicating whether it is known to be // precise. That doesn't take part in the LibCall so we can't directly use // LowerF128Call. + + SDLoc dl(Op); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first; -} + std::pair Tmp = makeLibCall(DAG, LC, VT, In, CallOptions, + dl, Chain); -// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking -// the default expansion of STRICT_FP_ROUND. -static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) { - // FIXME: Need to form a libcall with an input chain for f128. - assert(Op.getOperand(0).getValueType() != MVT::f128 && - "Don't know how to handle f128 yet!"); - return Op; + if (IsStrict) + return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); + + return Tmp.first; } /// Depending on uarch and/or optimizing for size, we might prefer to use a @@ -27773,9 +27780,21 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op, SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { - SmallVector Ops(Op->op_begin(), Op->op_end()); + + bool IsStrict = Op->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SmallVector Ops(Op->op_begin() + Offset, Op->op_end()); + + SDLoc dl(Op); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; + std::pair Tmp = makeLibCall(DAG, Call, MVT::f128, Ops, + CallOptions, dl, Chain); + + if (IsStrict) + return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); + + return Tmp.first; } /// Provide custom lowering hooks for some operations. @@ -27825,9 +27844,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); - case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); - case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG); + case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll new file mode 100644 index 0000000000000..84964d7719251 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX + +; Check soft floating point conversion function calls. + +@vf32 = common global float 0.000000e+00, align 4 +@vf64 = common global double 0.000000e+00, align 8 +@vf80 = common global x86_fp80 0xK00000000000000000000, align 8 +@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16 + +define void @TestFPExtF32_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF32_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-SSE-NEXT: callq __extendsftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF32_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: callq __extendsftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load float, float* @vf32, align 4 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPExtF64_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF64_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-SSE-NEXT: callq __extenddftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF64_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX-NEXT: callq __extenddftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load double, double* @vf64, align 8 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPExtF80_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF80_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: subq $24, %rsp +; X64-SSE-NEXT: fldt {{.*}}(%rip) +; X64-SSE-NEXT: fstpt (%rsp) +; X64-SSE-NEXT: callq __extendxftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: addq $24, %rsp +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF80_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: subq $24, %rsp +; X64-AVX-NEXT: fldt {{.*}}(%rip) +; X64-AVX-NEXT: fstpt (%rsp) +; X64-AVX-NEXT: callq __extendxftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: addq $24, %rsp +; X64-AVX-NEXT: retq +entry: + %0 = load x86_fp80, x86_fp80* @vf80, align 8 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80 %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPTruncF128_F32() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F32: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfsf2 +; X64-SSE-NEXT: movss %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F32: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfsf2 +; X64-AVX-NEXT: vmovss %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store float %conv, float* @vf32, align 4 + ret void +} + +define void @TestFPTruncF128_F64() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F64: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfdf2 +; X64-SSE-NEXT: movsd %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F64: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfdf2 +; X64-AVX-NEXT: vmovsd %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store double %conv, double* @vf64, align 8 + ret void +} + +define void @TestFPTruncF128_F80() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F80: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfxf2 +; X64-SSE-NEXT: fstpt {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F80: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfxf2 +; X64-AVX-NEXT: fstpt {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store x86_fp80 %conv, x86_fp80* @vf80, align 8 + ret void +} + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80, metadata) From 3dc7c5f7d83c61fdbce5ad2ab5aad716dd6d1d1e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 Nov 2019 18:13:29 -0800 Subject: [PATCH 003/591] [LegalizeTypes] Remove code to create ISD::FP_TO_FP16 from SoftenFloatRes_FTRUNC. There seems to have been a misunderstanding of what ISD::FTRUNC represents. ISD::FTRUNC is equivalent to llvm.trunc which takes a floating point value, truncates it without changing the size of the value and returns it. Despite its similar name, its different than the fptrunc instruction in IR which changes a floating point value to a smaller floating point value. fptrunc is represented by ISD::FP_ROUND in SelectionDAG. Since the ISD::FP_TO_FP16 node takes a floating point value and converts it to f16 its more similar to ISD::FP_ROUND. In fact there is identical code to what is being removed here in SoftenFloatRes_FP_ROUND. I assume this bug was never encountered because it would require f16 to be legalized by softening rather than the default of promoting. --- llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index af963bc028026..c4a74d5c1c714 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -705,9 +705,6 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - if (N->getValueType(0) == MVT::f16) - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; EVT OpsVT[1] = { N->getOperand(0).getValueType() }; From c9ddb02659e3ece7a0d9d6b4dac7ceea4ae46e6d Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Tue, 26 Nov 2019 09:32:13 +0500 Subject: [PATCH 004/591] Revert "As a follow-up to my initial mail to llvm-dev here's a first pass at the O1 described there." This reverts commit 8ff85ed905a7306977d07a5cd67ab4d5a56fafb4. This commit introduced 9 new failures on lldb buildbot host at http://lab.llvm.org:8014/builders/lldb-aarch64-ubuntu Following tests were failing: lldb-api :: functionalities/tail_call_frames/ambiguous_tail_call_seq1/TestAmbiguousTailCallSeq1.py lldb-api :: functionalities/tail_call_frames/ambiguous_tail_call_seq2/TestAmbiguousTailCallSeq2.py lldb-api :: functionalities/tail_call_frames/disambiguate_call_site/TestDisambiguateCallSite.py lldb-api :: functionalities/tail_call_frames/disambiguate_paths_to_common_sink/TestDisambiguatePathsToCommonSink.py lldb-api :: functionalities/tail_call_frames/disambiguate_tail_call_seq/TestDisambiguateTailCallSeq.py lldb-api :: functionalities/tail_call_frames/inlining_and_tail_calls/TestInliningAndTailCalls.py lldb-api :: functionalities/tail_call_frames/sbapi_support/TestTailCallFrameSBAPI.py lldb-api :: functionalities/tail_call_frames/thread_step_out_message/TestArtificialFrameStepOutMessage.py lldb-api :: functionalities/tail_call_frames/thread_step_out_or_return/TestSteppingOutWithArtificialFrames.py lldb-api :: functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py Tags: #llvm Differential Revision: https://reviews.llvm.org/D65410 --- .../2008-07-30-implicit-initialization.c | 2 +- clang/test/CodeGen/arm-fp16-arguments.c | 6 +- clang/test/CodeGen/arm-vfp16-arguments2.cpp | 6 +- clang/test/CodeGen/atomic-ops-libcall.c | 34 +-- clang/test/CodeGenCXX/atomicinit.cpp | 2 +- clang/test/CodeGenCXX/auto-var-init.cpp | 9 +- clang/test/CodeGenCXX/discard-name-values.cpp | 4 +- .../CodeGenCXX/microsoft-abi-dynamic-cast.cpp | 18 +- .../test/CodeGenCXX/microsoft-abi-typeid.cpp | 8 +- clang/test/CodeGenCXX/nrvo.cpp | 18 +- clang/test/CodeGenCXX/stack-reuse.cpp | 2 +- clang/test/CodeGenCXX/wasm-args-returns.cpp | 12 +- clang/test/CodeGenObjCXX/arc-blocks.mm | 6 +- clang/test/CodeGenObjCXX/nrvo.mm | 4 +- ...e_to_dependency_directives_invalid_error.c | 32 +-- .../test/PCH/no-escaping-block-tail-calls.cpp | 4 +- llvm/include/llvm/Passes/PassBuilder.h | 10 +- llvm/lib/Passes/PassBuilder.cpp | 48 ++-- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 46 ++- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll | 268 +++++++++--------- llvm/test/Feature/optnone-opt.ll | 6 + llvm/test/Other/new-pm-defaults.ll | 78 ++--- llvm/test/Other/new-pm-thinlto-defaults.ll | 46 +-- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 2 +- .../PhaseOrdering/simplifycfg-options.ll | 8 +- .../PhaseOrdering/two-shifts-by-sext.ll | 4 +- 26 files changed, 340 insertions(+), 343 deletions(-) diff --git a/clang/test/CodeGen/2008-07-30-implicit-initialization.c b/clang/test/CodeGen/2008-07-30-implicit-initialization.c index f2621f4560ec9..e77c70a140f9d 100644 --- a/clang/test/CodeGen/2008-07-30-implicit-initialization.c +++ b/clang/test/CodeGen/2008-07-30-implicit-initialization.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple i386-unknown-unknown -O2 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -O1 -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: define i32 @f0() // CHECK: ret i32 0 // CHECK-LABEL: define i32 @f1() diff --git a/clang/test/CodeGen/arm-fp16-arguments.c b/clang/test/CodeGen/arm-fp16-arguments.c index 34dc1a1cbf6aa..d739f4b9c66a5 100644 --- a/clang/test/CodeGen/arm-fp16-arguments.c +++ b/clang/test/CodeGen/arm-fp16-arguments.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=NATIVE +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=NATIVE __fp16 g; diff --git a/clang/test/CodeGen/arm-vfp16-arguments2.cpp b/clang/test/CodeGen/arm-vfp16-arguments2.cpp index e436a5ecd6abd..4f75971d83277 100644 --- a/clang/test/CodeGen/arm-vfp16-arguments2.cpp +++ b/clang/test/CodeGen/arm-vfp16-arguments2.cpp @@ -1,12 +1,12 @@ // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ -// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O2 %s \ +// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O1 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-SOFT // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ -// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O2 %s \ +// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O1 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-HARD // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ // RUN: -mfloat-abi hard -target-feature +neon -target-feature +fullfp16 \ -// RUN: -emit-llvm -o - -O2 %s \ +// RUN: -emit-llvm -o - -O1 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-FULL typedef float float32_t; diff --git a/clang/test/CodeGen/atomic-ops-libcall.c b/clang/test/CodeGen/atomic-ops-libcall.c index ca79688c8a0c4..c673b07f8ed83 100644 --- a/clang/test/CodeGen/atomic-ops-libcall.c +++ b/clang/test/CodeGen/atomic-ops-libcall.c @@ -10,109 +10,109 @@ enum memory_order { int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) { // CHECK: test_c11_atomic_fetch_add_int_ptr - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5) return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst); } int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) { // CHECK: test_c11_atomic_fetch_sub_int_ptr - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5) return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst); } int test_c11_atomic_fetch_add_int(_Atomic(int) *p) { // CHECK: test_c11_atomic_fetch_add_int - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5) return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst); } int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) { // CHECK: test_c11_atomic_fetch_sub_int - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5) return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst); } int *fp2a(int **p) { // CHECK: @fp2a - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0) // Note, the GNU builtins do not multiply by sizeof(T)! return __atomic_fetch_sub(p, 4, memory_order_relaxed); } int test_atomic_fetch_add(int *p) { // CHECK: test_atomic_fetch_add - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_add(p, 55, memory_order_seq_cst); } int test_atomic_fetch_sub(int *p) { // CHECK: test_atomic_fetch_sub - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_sub(p, 55, memory_order_seq_cst); } int test_atomic_fetch_and(int *p) { // CHECK: test_atomic_fetch_and - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_and(p, 55, memory_order_seq_cst); } int test_atomic_fetch_or(int *p) { // CHECK: test_atomic_fetch_or - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_or(p, 55, memory_order_seq_cst); } int test_atomic_fetch_xor(int *p) { // CHECK: test_atomic_fetch_xor - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_xor(p, 55, memory_order_seq_cst); } int test_atomic_fetch_nand(int *p) { // CHECK: test_atomic_fetch_nand - // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_nand(p, 55, memory_order_seq_cst); } int test_atomic_add_fetch(int *p) { // CHECK: test_atomic_add_fetch - // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = add i32 [[CALL]], 55 return __atomic_add_fetch(p, 55, memory_order_seq_cst); } int test_atomic_sub_fetch(int *p) { // CHECK: test_atomic_sub_fetch - // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = add i32 [[CALL]], -55 return __atomic_sub_fetch(p, 55, memory_order_seq_cst); } int test_atomic_and_fetch(int *p) { // CHECK: test_atomic_and_fetch - // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = and i32 [[CALL]], 55 return __atomic_and_fetch(p, 55, memory_order_seq_cst); } int test_atomic_or_fetch(int *p) { // CHECK: test_atomic_or_fetch - // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = or i32 [[CALL]], 55 return __atomic_or_fetch(p, 55, memory_order_seq_cst); } int test_atomic_xor_fetch(int *p) { // CHECK: test_atomic_xor_fetch - // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], 55 return __atomic_xor_fetch(p, 55, memory_order_seq_cst); } int test_atomic_nand_fetch(int *p) { // CHECK: test_atomic_nand_fetch - // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) // FIXME: We should not be checking optimized IR. It changes independently of clang. // FIXME-CHECK: [[AND:%[^ ]*]] = and i32 [[CALL]], 55 // FIXME-CHECK: {{%[^ ]*}} = xor i32 [[AND]], -1 diff --git a/clang/test/CodeGenCXX/atomicinit.cpp b/clang/test/CodeGenCXX/atomicinit.cpp index 657ade588fd5d..85ec74593fe04 100644 --- a/clang/test/CodeGenCXX/atomicinit.cpp +++ b/clang/test/CodeGenCXX/atomicinit.cpp @@ -31,7 +31,7 @@ _Atomic(B) b; // CHECK-LABEL: define void @_Z11atomic_initR1Ai void atomic_init(A& a, int i) { // CHECK-NOT: atomic - // CHECK: call void @_ZN1BC1Ei + // CHECK: tail call void @_ZN1BC1Ei __c11_atomic_init(&b, B(i)); // CHECK-NEXT: ret void } diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index 9cd71bdfd1a7d..a2cb2c8352b69 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -645,7 +645,7 @@ TEST_UNINIT(smallpartinit, smallpartinit); // ZERO-LABEL: @test_smallpartinit_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1-LEGACY: store i16 0, i16* %uninit, align 2 -// ZERO-O1-NEWPM: store i16 0, i16* %uninit, align 2 +// ZERO-O1-NEWPM: store i16 42, i16* %uninit, align 2 TEST_BRACES(smallpartinit, smallpartinit); // CHECK-LABEL: @test_smallpartinit_braces() @@ -718,7 +718,7 @@ TEST_UNINIT(paddednullinit, paddednullinit); // PATTERN-LABEL: @test_paddednullinit_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_paddednullinit_uninit.uninit // PATTERN-O1-LEGACY: store i64 [[I64]], i64* %uninit, align 8 -// PATTERN-O1-NEWPM: store i64 [[I64]], i64* %uninit, align 8 +// PATTERN-O1-NEWPM: store i64 2863311360, i64* %uninit, align 8 // ZERO-LABEL: @test_paddednullinit_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1: store i64 0, i64* %uninit, align 8 @@ -1344,7 +1344,10 @@ TEST_UNINIT(virtualderived, virtualderived); // ZERO-LABEL: @test_virtualderived_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1-LEGACY: call void @llvm.memset{{.*}}, i8 0, -// ZERO-O1-NEWPM: call void @llvm.memset{{.*}}, i8 0, +// ZERO-O1-NEWPM: [[FIELD1:%.*]] = getelementptr inbounds %struct.virtualderived, %struct.virtualderived* %uninit, i64 0, i32 1, i32 0, i32 0 +// ZERO-O1-NEWPM: [[FIELD0:%.*]] = getelementptr inbounds %struct.virtualderived, %struct.virtualderived* %uninit, i64 0, i32 0, i32 0 +// ZERO-O1-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*], [5 x i8*] }, { [7 x i8*], [5 x i8*] }* @_ZTV14virtualderived, i64 0, inrange i32 0, i64 5) to i32 (...)**), i32 (...)*** [[FIELD0]], align 8 +// ZERO-O1-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*], [5 x i8*] }, { [7 x i8*], [5 x i8*] }* @_ZTV14virtualderived, i64 0, inrange i32 1, i64 3) to i32 (...)**), i32 (...)*** [[FIELD1]], align 8 TEST_BRACES(virtualderived, virtualderived); // CHECK-LABEL: @test_virtualderived_braces() diff --git a/clang/test/CodeGenCXX/discard-name-values.cpp b/clang/test/CodeGenCXX/discard-name-values.cpp index 91328a4ddade3..aa30dae7501bd 100644 --- a/clang/test/CodeGenCXX/discard-name-values.cpp +++ b/clang/test/CodeGenCXX/discard-name-values.cpp @@ -11,11 +11,11 @@ bool test(bool pred) { if (pred) { // DISCARDVALUE: 2: - // DISCARDVALUE-NEXT: call void @branch() + // DISCARDVALUE-NEXT: tail call void @branch() // DISCARDVALUE-NEXT: br label %3 // CHECK: if.then: - // CHECK-NEXT: call void @branch() + // CHECK-NEXT: tail call void @branch() // CHECK-NEXT: br label %if.end branch(); } diff --git a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp index a07114dce7d07..c99df0e88b420 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp @@ -13,7 +13,7 @@ T* test0() { return dynamic_cast((B*)0); } T* test1(V* x) { return &dynamic_cast(*x); } // CHECK-LABEL: define dso_local %struct.T* @"?test1@@YAPAUT@@PAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -25,7 +25,7 @@ T* test2(A* x) { return &dynamic_cast(*x); } // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[VBOFFS]] -// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -39,14 +39,14 @@ T* test3(B* x) { return &dynamic_cast(*x); } // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] T* test4(V* x) { return dynamic_cast(x); } // CHECK-LABEL: define dso_local %struct.T* @"?test4@@YAPAUT@@PAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -60,7 +60,7 @@ T* test5(A* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]] -// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[VBOFFS]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[VBOFFS]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi %struct.T* @@ -78,7 +78,7 @@ T* test6(B* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi %struct.T* @@ -87,7 +87,7 @@ T* test6(B* x) { return dynamic_cast(x); } void* test7(V* x) { return dynamic_cast(x); } // CHECK-LABEL: define dso_local i8* @"?test7@@YAPAXPAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[RET:%.*]] = call i8* @__RTCastToVoid(i8* [[CAST]]) +// CHECK-NEXT: [[RET:%.*]] = tail call i8* @__RTCastToVoid(i8* [[CAST]]) // CHECK-NEXT: ret i8* [[RET]] void* test8(A* x) { return dynamic_cast(x); } @@ -100,7 +100,7 @@ void* test8(A* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]] -// CHECK-NEXT: [[RES:%.*]] = call i8* @__RTCastToVoid(i8* nonnull [[ADJ]]) +// CHECK-NEXT: [[RES:%.*]] = tail call i8* @__RTCastToVoid(i8* nonnull [[ADJ]]) // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi i8* // CHECK-NEXT: ret i8* [[RET]] @@ -117,7 +117,7 @@ void* test9(B* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTCastToVoid(i8* [[ADJ]]) +// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTCastToVoid(i8* [[ADJ]]) // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi i8* // CHECK-NEXT: ret i8* [[RET]] diff --git a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp index f3bd7e6fd6c80..848e280cd9fe0 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp @@ -25,10 +25,10 @@ const std::type_info* test2_typeid() { return &typeid(&a); } const std::type_info* test3_typeid() { return &typeid(*fn()); } // CHECK-LABEL: define dso_local %struct.type_info* @"?test3_typeid@@YAPBUtype_info@@XZ"() -// CHECK: [[CALL:%.*]] = call %struct.A* @"?fn@@YAPAUA@@XZ"() +// CHECK: [[CALL:%.*]] = tail call %struct.A* @"?fn@@YAPAUA@@XZ"() // CHECK-NEXT: [[CMP:%.*]] = icmp eq %struct.A* [[CALL]], null // CHECK-NEXT: br i1 [[CMP]] -// CHECK: call i8* @__RTtypeid(i8* null) +// CHECK: tail call i8* @__RTtypeid(i8* null) // CHECK-NEXT: unreachable // CHECK: [[THIS:%.*]] = bitcast %struct.A* [[CALL]] to i8* // CHECK-NEXT: [[VBTBLP:%.*]] = getelementptr %struct.A, %struct.A* [[CALL]], i32 0, i32 0 @@ -36,7 +36,7 @@ const std::type_info* test3_typeid() { return &typeid(*fn()); } // CHECK-NEXT: [[VBSLOT:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBASE_OFFS:%.*]] = load i32, i32* [[VBSLOT]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[THIS]], i32 [[VBASE_OFFS]] -// CHECK-NEXT: [[RT:%.*]] = call i8* @__RTtypeid(i8* nonnull [[ADJ]]) +// CHECK-NEXT: [[RT:%.*]] = tail call i8* @__RTtypeid(i8* nonnull [[ADJ]]) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info* // CHECK-NEXT: ret %struct.type_info* [[RET]] @@ -46,7 +46,7 @@ const std::type_info* test4_typeid() { return &typeid(b); } const std::type_info* test5_typeid() { return &typeid(v); } // CHECK: define dso_local %struct.type_info* @"?test5_typeid@@YAPBUtype_info@@XZ"() -// CHECK: [[RT:%.*]] = call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*)) +// CHECK: [[RT:%.*]] = tail call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*)) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info* // CHECK-NEXT: ret %struct.type_info* [[RET]] diff --git a/clang/test/CodeGenCXX/nrvo.cpp b/clang/test/CodeGenCXX/nrvo.cpp index 74a5af765d130..aab26890ea988 100644 --- a/clang/test/CodeGenCXX/nrvo.cpp +++ b/clang/test/CodeGenCXX/nrvo.cpp @@ -33,13 +33,13 @@ X test0() { // CHECK-LABEL: define void @_Z5test1b( // CHECK-EH-LABEL: define void @_Z5test1b( X test1(bool B) { - // CHECK: call {{.*}} @_ZN1XC1Ev + // CHECK: tail call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret void X x; if (B) return (x); return x; - // CHECK-EH: call {{.*}} @_ZN1XC1Ev + // CHECK-EH: tail call {{.*}} @_ZN1XC1Ev // CHECK-EH-NEXT: ret void } @@ -130,7 +130,7 @@ X test2(bool B) { // CHECK-LABEL: define void @_Z5test3b X test3(bool B) { - // CHECK: call {{.*}} @_ZN1XC1Ev + // CHECK: tail call {{.*}} @_ZN1XC1Ev // CHECK-NOT: call {{.*}} @_ZN1XC1ERKS_ // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK: call {{.*}} @_ZN1XC1ERKS_ @@ -148,14 +148,14 @@ extern "C" void exit(int) throw(); // CHECK-LABEL: define void @_Z5test4b X test4(bool B) { { - // CHECK: call {{.*}} @_ZN1XC1Ev + // CHECK: tail call {{.*}} @_ZN1XC1Ev X x; // CHECK: br i1 if (B) return x; } - // CHECK: call {{.*}} @_ZN1XD1Ev - // CHECK: call void @exit(i32 1) + // CHECK: tail call {{.*}} @_ZN1XD1Ev + // CHECK: tail call void @exit(i32 1) exit(1); } @@ -191,7 +191,7 @@ X test6() { // CHECK-LABEL: define void @_Z5test7b X test7(bool b) { - // CHECK: call {{.*}} @_ZN1XC1Ev + // CHECK: tail call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret if (b) { X x; @@ -202,7 +202,7 @@ X test7(bool b) { // CHECK-LABEL: define void @_Z5test8b X test8(bool b) { - // CHECK: call {{.*}} @_ZN1XC1Ev + // CHECK: tail call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret if (b) { X x; @@ -218,6 +218,6 @@ Y test9() { } // CHECK-LABEL: define linkonce_odr void @_ZN1YIiE1fEv -// CHECK: call {{.*}} @_ZN1YIiEC1Ev +// CHECK: tail call {{.*}} @_ZN1YIiEC1Ev // CHECK-EH-03: attributes [[NR_NUW]] = { noreturn nounwind } diff --git a/clang/test/CodeGenCXX/stack-reuse.cpp b/clang/test/CodeGenCXX/stack-reuse.cpp index 35dcb5b349c3e..8325604391ae2 100644 --- a/clang/test/CodeGenCXX/stack-reuse.cpp +++ b/clang/test/CodeGenCXX/stack-reuse.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf %s -o - -emit-llvm -O2 | FileCheck %s +// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf %s -o - -emit-llvm -O1 | FileCheck %s // Stack should be reused when possible, no need to allocate two separate slots // if they have disjoint lifetime. diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp index c547eb85390da..5718223f9f740 100644 --- a/clang/test/CodeGenCXX/wasm-args-returns.cpp +++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp @@ -19,8 +19,8 @@ test(one_field); // CHECK: define double @_Z7forward9one_field(double returned %{{.*}}) // // CHECK: define void @_Z14test_one_fieldv() -// CHECK: %[[call:.*]] = call double @_Z13def_one_fieldv() -// CHECK: call void @_Z3use9one_field(double %[[call]]) +// CHECK: %[[call:.*]] = tail call double @_Z13def_one_fieldv() +// CHECK: tail call void @_Z3use9one_field(double %[[call]]) // CHECK: ret void // // CHECK: declare void @_Z3use9one_field(double) @@ -82,8 +82,8 @@ test(empty); // CHECK: define void @_Z7forward5empty() // // CHECK: define void @_Z10test_emptyv() -// CHECK: call void @_Z9def_emptyv() -// CHECK: call void @_Z3use5empty() +// CHECK: tail call void @_Z9def_emptyv() +// CHECK: tail call void @_Z3use5empty() // CHECK: ret void // // CHECK: declare void @_Z3use5empty() @@ -96,8 +96,8 @@ test(one_bitfield); // CHECK: define i32 @_Z7forward12one_bitfield(i32 returned %{{.*}}) // // CHECK: define void @_Z17test_one_bitfieldv() -// CHECK: %[[call:.*]] = call i32 @_Z16def_one_bitfieldv() -// CHECK: call void @_Z3use12one_bitfield(i32 %[[call]]) +// CHECK: %[[call:.*]] = tail call i32 @_Z16def_one_bitfieldv() +// CHECK: tail call void @_Z3use12one_bitfield(i32 %[[call]]) // CHECK: ret void // // CHECK: declare void @_Z3use12one_bitfield(i32) diff --git a/clang/test/CodeGenObjCXX/arc-blocks.mm b/clang/test/CodeGenObjCXX/arc-blocks.mm index d29491ed077ea..24697cf1bd377 100644 --- a/clang/test/CodeGenObjCXX/arc-blocks.mm +++ b/clang/test/CodeGenObjCXX/arc-blocks.mm @@ -122,7 +122,7 @@ void foo() { // CHECK: call void @__clang_call_terminate( // CHECK-O1-LABEL: define linkonce_odr hidden void @__copy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( -// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release // CHECK-NOEXCP: define linkonce_odr hidden void @__copy_helper_block_8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( // CHECK: define linkonce_odr hidden void @__destroy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( @@ -170,8 +170,8 @@ void foo() { // CHECK: call void @__clang_call_terminate( // CHECK-O1-LABEL: define linkonce_odr hidden void @__destroy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( -// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release -// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release // CHECK-NOEXCP: define linkonce_odr hidden void @__destroy_helper_block_8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( namespace { diff --git a/clang/test/CodeGenObjCXX/nrvo.mm b/clang/test/CodeGenObjCXX/nrvo.mm index a02b38b820a3e..1ad5f79ad12ea 100644 --- a/clang/test/CodeGenObjCXX/nrvo.mm +++ b/clang/test/CodeGenObjCXX/nrvo.mm @@ -14,7 +14,7 @@ @implementation NRVO // CHECK: define internal void @"\01-[NRVO getNRVO]" - (X)getNRVO { X x; - // CHECK: call void @_ZN1XC1Ev + // CHECK: tail call void @_ZN1XC1Ev // CHECK-NEXT: ret void return x; } @@ -24,7 +24,7 @@ X blocksNRVO() { return ^{ // CHECK-LABEL: define internal void @___Z10blocksNRVOv_block_invoke X x; - // CHECK: call void @_ZN1XC1Ev + // CHECK: tail call void @_ZN1XC1Ev // CHECK-NEXT: ret void return x; }() ; diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c index 020912a4965de..c4a4cf3d97526 100644 --- a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c @@ -1,16 +1,16 @@ -// Test CF+LF are properly handled along with quoted, multi-line #error -// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s - -#ifndef TEST -#error "message \ - more message \ - even more" -#endif - -#ifdef OTHER -#include -#endif - -// CHECK: #ifdef OTHER -// CHECK-NEXT: #include -// CHECK-NEXT: #endif +// Test CF+LF are properly handled along with quoted, multi-line #error +// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s + +#ifndef TEST +#error "message \ + more message \ + even more" +#endif + +#ifdef OTHER +#include +#endif + +// CHECK: #ifdef OTHER +// CHECK-NEXT: #include +// CHECK-NEXT: #endif diff --git a/clang/test/PCH/no-escaping-block-tail-calls.cpp b/clang/test/PCH/no-escaping-block-tail-calls.cpp index bf197267d67d4..5ae8108f387d0 100644 --- a/clang/test/PCH/no-escaping-block-tail-calls.cpp +++ b/clang/test/PCH/no-escaping-block-tail-calls.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -x c++-header -triple x86_64-apple-darwin11 -emit-pch -O2 -fblocks -fno-escaping-block-tail-calls -o %t %S/no-escaping-block-tail-calls.h -// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -include-pch %t -emit-llvm -O2 -fblocks -fno-escaping-block-tail-calls -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++-header -triple x86_64-apple-darwin11 -emit-pch -O1 -fblocks -fno-escaping-block-tail-calls -o %t %S/no-escaping-block-tail-calls.h +// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -include-pch %t -emit-llvm -O1 -fblocks -fno-escaping-block-tail-calls -o - %s | FileCheck %s // Check that -fno-escaping-block-tail-calls doesn't disable tail-call // optimization if the block is non-escaping. diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 7fe03f72305b1..f73e4b42dd4bf 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -151,6 +151,10 @@ class PassBuilder { /// Optimize quickly without destroying debuggability. /// + /// FIXME: The current and historical behavior of this level does *not* + /// agree with this goal, but we would like to move toward this goal in the + /// future. + /// /// This level is tuned to produce a result from the optimizer as quickly /// as possible and to avoid destroying debuggability. This tends to result /// in a very good development mode where the compiled code will be @@ -160,9 +164,9 @@ class PassBuilder { /// debugging of the resulting binary. /// /// As an example, complex loop transformations such as versioning, - /// vectorization, or fusion don't make sense here due to the degree to - /// which the executed code differs from the source code, and the compile time - /// cost. + /// vectorization, or fusion might not make sense here due to the degree to + /// which the executed code would differ from the source code, and the + /// potential compile time cost. O1, /// Optimize for fast execution as much as possible without triggering diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index b22921b2b878c..5896dbf5bb985 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -400,25 +400,21 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. - if (Level > O1) { - if (EnableGVNHoist) - FPM.addPass(GVNHoistPass()); - - // Global value numbering based sinking. - if (EnableGVNSink) { - FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); - } + if (EnableGVNHoist) + FPM.addPass(GVNHoistPass()); + + // Global value numbering based sinking. + if (EnableGVNSink) { + FPM.addPass(GVNSinkPass()); + FPM.addPass(SimplifyCFGPass()); } // Speculative execution if the target has divergent branches; otherwise nop. - if (Level > O1) { - FPM.addPass(SpeculativeExecutionPass()); + FPM.addPass(SpeculativeExecutionPass()); - // Optimize based on known information about branches, and cleanup afterward. - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - } + // Optimize based on known information about branches, and cleanup afterward. + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); FPM.addPass(SimplifyCFGPass()); if (Level == O3) FPM.addPass(AggressiveInstCombinePass()); @@ -432,12 +428,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && - !isOptimizingForSize(Level) && Level > O1) + !isOptimizingForSize(Level)) FPM.addPass(PGOMemOPSizeOpt()); - // TODO: Investigate the cost/benefit of tail call elimination on debugging. - if (Level > O1) - FPM.addPass(TailCallElimPass()); + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); // Form canonically associated expression trees, and simplify the trees using @@ -464,7 +458,6 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); - // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); @@ -532,21 +525,18 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. - if (Level > O1) { - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(DSEPass()); - FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - EnableMSSALoopDependency, DebugLogging)); - } + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DSEPass()); + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + EnableMSSALoopDependency, DebugLogging)); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. - // TODO: Investigate if this is too expensive. FPM.addPass(ADCEPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 81424229c3bfe..5314a8219b1ea 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -320,26 +320,19 @@ void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. - assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies - - if (OptLevel > 1) { - if (EnableGVNHoist) - MPM.add(createGVNHoistPass()); - if (EnableGVNSink) { - MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); - } + if (EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); } - if (OptLevel > 1) { - // Speculative execution if the target has divergent branches; otherwise nop. - MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); - - MPM.add(createJumpThreadingPass()); // Thread jumps. - MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals - } + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Combine silly seq's if (OptLevel > 2) @@ -353,10 +346,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (SizeLevel == 0) MPM.add(createPGOMemOPSizeOptLegacyPass()); - // TODO: Investigate the cost/benefit of tail call elimination on debugging. - if (OptLevel > 1) - MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // Begin the loop pass pipeline. @@ -369,7 +360,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses( } // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); - // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); @@ -412,19 +402,16 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // opened up by them. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); - if (OptLevel > 1) { - MPM.add(createJumpThreadingPass()); // Thread jumps - MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); - } + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); addExtensionsToPM(EP_ScalarOptimizerLate, MPM); if (RerollLoops) MPM.add(createLoopRerollPass()); - // TODO: Investigate if this is too expensive at O1. MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. @@ -912,8 +899,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. - if (OptLevel > 1) - PM.add(createTailCallEliminationPass()); + PM.add(createTailCallEliminationPass()); // Infer attributes on declarations, call sites, arguments, etc. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index 682c0679fa240..859f848d228c4 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -3,17 +3,17 @@ ; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos -; GCN-POSTLINK: call fast float @_Z3sinf( -; GCN-POSTLINK: call fast float @_Z3cosf( +; GCN-POSTLINK: tail call fast float @_Z3sinf( +; GCN-POSTLINK: tail call fast float @_Z3cosf( ; GCN-PRELINK: call fast float @_Z6sincosfPf( -; GCN-NATIVE: call fast float @_Z10native_sinf( -; GCN-NATIVE: call fast float @_Z10native_cosf( +; GCN-NATIVE: tail call fast float @_Z10native_sinf( +; GCN-NATIVE: tail call fast float @_Z10native_cosf( define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3sinf(float %tmp) + %call = tail call fast float @_Z3sinf(float %tmp) store float %call, float addrspace(1)* %a, align 4 - %call2 = call fast float @_Z3cosf(float %tmp) + %call2 = tail call fast float @_Z3cosf(float %tmp) %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 store float %call2, float addrspace(1)* %arrayidx3, align 4 ret void @@ -24,17 +24,17 @@ declare float @_Z3sinf(float) declare float @_Z3cosf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2 -; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f( -; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f( +; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f( +; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f( ; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_( -; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f( -; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f( +; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f( +; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f( define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8 - %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) + %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8 - %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) + %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1 store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8 ret void @@ -45,20 +45,20 @@ declare <2 x float> @_Z3sinDv2_f(<2 x float>) declare <2 x float> @_Z3cosDv2_f(<2 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3 -; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f( -; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f( +; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f( +; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f( ; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_( -; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f( -; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f( +; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f( +; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f( define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) { entry: %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)* %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> - %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) + %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16 - %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) + %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)* @@ -71,17 +71,17 @@ declare <3 x float> @_Z3sinDv3_f(<3 x float>) declare <3 x float> @_Z3cosDv3_f(<3 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4 -; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f( -; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f( +; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f( +; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f( ; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_( -; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f( -; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f( +; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f( +; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f( define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16 - %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) + %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16 - %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) + %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1 store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16 ret void @@ -92,17 +92,17 @@ declare <4 x float> @_Z3sinDv4_f(<4 x float>) declare <4 x float> @_Z3cosDv4_f(<4 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8 -; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f( -; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f( +; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f( +; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f( ; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_( -; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f( -; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f( +; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f( +; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f( define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32 - %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) + %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32 - %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) + %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1 store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32 ret void @@ -113,17 +113,17 @@ declare <8 x float> @_Z3sinDv8_f(<8 x float>) declare <8 x float> @_Z3cosDv8_f(<8 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16 -; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f( -; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f( +; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f( +; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f( ; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_( -; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f( -; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f( +; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f( +; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f( define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64 - %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) + %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64 - %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) + %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1 store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64 ret void @@ -137,7 +137,7 @@ declare <16 x float> @_Z3cosDv16_f(<16 x float>) ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) { entry: - %call = call fast float @_Z12native_recipf(float 3.000000e+00) + %call = tail call fast float @_Z12native_recipf(float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -148,7 +148,7 @@ declare float @_Z12native_recipf(float) ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) { entry: - %call = call fast float @_Z10half_recipf(float 3.000000e+00) + %call = tail call fast float @_Z10half_recipf(float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -160,7 +160,7 @@ declare float @_Z10half_recipf(float) define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) + %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -172,7 +172,7 @@ declare float @_Z13native_divideff(float, float) define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) + %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -184,7 +184,7 @@ declare float @_Z11half_divideff(float, float) define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) + %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -196,7 +196,7 @@ declare float @_Z3powff(float, float) define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) + %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -208,7 +208,7 @@ define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) + %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -220,7 +220,7 @@ define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) + %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -231,7 +231,7 @@ entry: define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) + %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -242,7 +242,7 @@ entry: define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) + %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -254,7 +254,7 @@ define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) + %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -266,31 +266,31 @@ define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) + %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half -; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01) -; GCN-PRELINK: %__pow2sqrt = call fast float @_Z4sqrtf(float %tmp) +; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) +; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp) define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01) + %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf -; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01) -; GCN-PRELINK: %__pow2rsqrt = call fast float @_Z5rsqrtf(float %tmp) +; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) +; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01) + %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -305,7 +305,7 @@ define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01) + %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -320,7 +320,7 @@ define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01) + %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -337,7 +337,7 @@ define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z4pownfi(float %tmp, i32 11) + %call = tail call fast float @_Z4pownfi(float %tmp, i32 11) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -345,11 +345,11 @@ entry: declare float @_Z4pownfi(float, i32) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow -; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 1.013000e+03) -; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp) -; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs) +; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) +; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) +; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03 -; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 @@ -359,39 +359,39 @@ declare float @_Z4pownfi(float, i32) define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03) + %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr -; GCN-POSTLINK: call fast float @_Z4powrff(float %tmp, float %tmp1) -; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %tmp) +; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1) +; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4 -; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx) +; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 - %call = call fast float @_Z4powrff(float %tmp, float %tmp1) + %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown -; GCN-POSTLINK: call fast float @_Z4pownfi(float %tmp, i32 %conv) +; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv) ; GCN-PRELINK: %conv = fptosi float %tmp1 to i32 -; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp) -; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs) +; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) +; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) ; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F -; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %__yeven = shl i32 %conv, 31 ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] @@ -405,7 +405,7 @@ entry: %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 %conv = fptosi float %tmp1 to i32 - %call = call fast float @_Z4pownfi(float %tmp, i32 %conv) + %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -417,7 +417,7 @@ define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = call fast float @_Z5rootnfi(float %tmp, i32 1) + %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -425,23 +425,23 @@ entry: declare float @_Z5rootnfi(float, i32) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 -; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 2) -; GCN-PRELINK: %__rootn2sqrt = call fast float @_Z4sqrtf(float %tmp) +; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2) +; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp) define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z5rootnfi(float %tmp, i32 2) + %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3 -; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3) -; GCN-PRELINK: %__rootn2cbrt = call fast float @_Z4cbrtf(float %tmp) +; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3) +; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp) define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z5rootnfi(float %tmp, i32 3) + %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -451,18 +451,18 @@ entry: define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z5rootnfi(float %tmp, i32 -1) + %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2 -; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 -2) -; GCN-PRELINK: %__rootn2rsqrt = call fast float @_Z5rsqrtf(float %tmp) +; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2) +; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z5rootnfi(float %tmp, i32 -2) + %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -472,7 +472,7 @@ entry: define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) + %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -484,7 +484,7 @@ declare float @_Z3fmafff(float, float, float) define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) + %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -494,7 +494,7 @@ entry: define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) + %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -506,7 +506,7 @@ declare float @_Z3madfff(float, float, float) define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) + %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -516,7 +516,7 @@ entry: define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) + %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -526,7 +526,7 @@ entry: define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) + %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -538,17 +538,17 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 %tmp1 = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) + %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp -; GCN-NATIVE: call fast float @_Z10native_expf(float %tmp) +; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp) define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3expf(float %tmp) + %call = tail call fast float @_Z3expf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -556,11 +556,11 @@ entry: declare float @_Z3expf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2 -; GCN-NATIVE: call fast float @_Z11native_exp2f(float %tmp) +; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp) define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z4exp2f(float %tmp) + %call = tail call fast float @_Z4exp2f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -568,11 +568,11 @@ entry: declare float @_Z4exp2f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10 -; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp) +; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp) define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z5exp10f(float %tmp) + %call = tail call fast float @_Z5exp10f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -580,11 +580,11 @@ entry: declare float @_Z5exp10f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log -; GCN-NATIVE: call fast float @_Z10native_logf(float %tmp) +; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp) define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3logf(float %tmp) + %call = tail call fast float @_Z3logf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -592,11 +592,11 @@ entry: declare float @_Z3logf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2 -; GCN-NATIVE: call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp) define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z4log2f(float %tmp) + %call = tail call fast float @_Z4log2f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -604,11 +604,11 @@ entry: declare float @_Z4log2f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10 -; GCN-NATIVE: call fast float @_Z12native_log10f(float %tmp) +; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp) define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z5log10f(float %tmp) + %call = tail call fast float @_Z5log10f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -617,36 +617,36 @@ declare float @_Z5log10f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr ; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 -; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx) +; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 - %call = call fast float @_Z4powrff(float %tmp, float %tmp1) + %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt -; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp) +; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp) define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z4sqrtf(float %tmp) + %call = tail call fast float @_Z4sqrtf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64 -; GCN: call fast double @_Z4sqrtd(double %tmp) +; GCN: tail call fast double @_Z4sqrtd(double %tmp) define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) { entry: %tmp = load double, double addrspace(1)* %a, align 8 - %call = call fast double @_Z4sqrtd(double %tmp) + %call = tail call fast double @_Z4sqrtd(double %tmp) store double %call, double addrspace(1)* %a, align 8 ret void } @@ -655,11 +655,11 @@ declare float @_Z4sqrtf(float) declare double @_Z4sqrtd(double) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt -; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp) +; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp) define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z5rsqrtf(float %tmp) + %call = tail call fast float @_Z5rsqrtf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -667,11 +667,11 @@ entry: declare float @_Z5rsqrtf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan -; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp) +; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp) define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = call fast float @_Z3tanf(float %tmp) + %call = tail call fast float @_Z3tanf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -679,14 +679,14 @@ entry: declare float @_Z3tanf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos -; GCN-NATIVE: call float @_Z10native_sinf(float %tmp) -; GCN-NATIVE: call float @_Z10native_cosf(float %tmp) +; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp) +; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp) define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float* - %call = call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) + %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -703,10 +703,10 @@ define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 a entry: %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* - %tmp2 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 - %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) - %tmp4 = call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 - call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) + %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 + %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) + %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 + tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) ret void } @@ -725,10 +725,10 @@ define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 entry: %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* - %tmp2 = call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 - %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 - %tmp4 = call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 - call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 + %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 + %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 + %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 + tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 ret void } @@ -755,31 +755,31 @@ declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_ define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { entry: %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8* - %tmp1 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 + %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8* - %tmp4 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 + %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8* - %tmp7 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 + %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8* - %tmp10 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 + %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8* - %tmp13 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 + %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8* - %tmp16 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 + %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8* - %tmp19 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 + %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8* - %tmp22 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 + %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8* - %tmp25 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 + %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 ret void } diff --git a/llvm/test/Feature/optnone-opt.ll b/llvm/test/Feature/optnone-opt.ll index f706ade7934f4..ae0e1a48acc58 100644 --- a/llvm/test/Feature/optnone-opt.ll +++ b/llvm/test/Feature/optnone-opt.ll @@ -39,10 +39,16 @@ attributes #0 = { optnone noinline } ; IR passes run at -O1 and higher. ; OPT-O1-DAG: Skipping pass 'Aggressive Dead Code Elimination' ; OPT-O1-DAG: Skipping pass 'Combine redundant instructions' +; OPT-O1-DAG: Skipping pass 'Dead Store Elimination' ; OPT-O1-DAG: Skipping pass 'Early CSE' +; OPT-O1-DAG: Skipping pass 'Jump Threading' +; OPT-O1-DAG: Skipping pass 'MemCpy Optimization' ; OPT-O1-DAG: Skipping pass 'Reassociate expressions' ; OPT-O1-DAG: Skipping pass 'Simplify the CFG' ; OPT-O1-DAG: Skipping pass 'Sparse Conditional Constant Propagation' +; OPT-O1-DAG: Skipping pass 'SROA' +; OPT-O1-DAG: Skipping pass 'Tail Call Elimination' +; OPT-O1-DAG: Skipping pass 'Value Propagation' ; Additional IR passes run at -O2 and higher. ; OPT-O2O3-DAG: Skipping pass 'Global Value Numbering' diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index e79a359277f6f..009f19e544c85 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -12,70 +12,66 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \ -; RUN: --check-prefix=CHECK-O23SZ +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-O23SZ +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Os \ -; RUN: --check-prefix=CHECK-O23SZ +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Os ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Oz \ -; RUN: --check-prefix=CHECK-O23SZ +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Oz ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \ -; RUN: --check-prefix=CHECK-O2-LTO --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-O2-LTO ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-peephole='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PEEPHOLE --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-PEEPHOLE ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-late-loop-optimizations='no-op-loop' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-LOOP-LATE --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-LOOP-LATE ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-loop-optimizer-end='no-op-loop' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-LOOP-END --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-LOOP-END ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-scalar-optimizer-late='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-SCALAR-LATE --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-SCALAR-LATE ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-cgscc-optimizer-late='no-op-cgscc' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-CGSCC-LATE --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-CGSCC-LATE ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-vectorizer-start='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-VECTORIZER-START --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-VECTORIZER-START ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PIPELINE-START --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-PIPELINE-START ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PIPELINE-START --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-PIPELINE-START ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-optimizer-last='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST --check-prefix=CHECK-O23SZ +; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST ; CHECK-O: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting llvm::Module pass manager run. @@ -136,10 +132,10 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis -; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass -; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass -; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis -; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass +; CHECK-O-NEXT: Running pass: JumpThreadingPass +; CHECK-O-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -147,7 +143,7 @@ ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass -; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass +; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis @@ -184,10 +180,22 @@ ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo -; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-O23SZ-NEXT: Running pass: GVN -; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O23SZ-NEXT: Running analysis: PhiValuesAnalysis +; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass +; CHECK-Os-NEXT: Running pass: GVN +; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis +; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass +; CHECK-Oz-NEXT: Running pass: GVN +; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis +; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass +; CHECK-O2-NEXT: Running pass: GVN +; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis +; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass +; CHECK-O3-NEXT: Running pass: GVN +; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O-NEXT: Running pass: MemCpyOptPass ; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis @@ -196,14 +204,14 @@ ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass -; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass -; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass -; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> -; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. -; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass -; CHECK-O23SZ-NEXT: Running pass: LCSSAPass -; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: JumpThreadingPass +; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O-NEXT: Running pass: DSEPass +; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> +; CHECK-O-NEXT: Starting llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-EP-SCALAR-LATE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index c93b360009b25..a0b4df044450f 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -13,19 +13,19 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O3,CHECK-EP-PIPELINE-START +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O3,CHECK-EP-PIPELINE-START ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Os +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Os ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Oz +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Oz ; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O2 ; ; Postlink pipelines: ; RUN: opt -disable-verify -debug-pass-manager \ @@ -33,19 +33,19 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,CHECK-POSTLINK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 ; RUN: opt -disable-verify -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O3 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-POSTLINK-O,CHECK-POSTLINK-O3 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-Os +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-POSTLINK-O,CHECK-POSTLINK-Os ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-Oz +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-POSTLINK-O,CHECK-POSTLINK-Oz ; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 ; ; CHECK-O: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting llvm::Module pass manager run. @@ -112,17 +112,17 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis -; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass -; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass -; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis -; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass +; CHECK-O-NEXT: Running pass: JumpThreadingPass +; CHECK-O-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass -; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass +; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis @@ -180,14 +180,14 @@ ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass -; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass -; CHECK-O23SZ-NEXT: Running pass: DSEPass -; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> -; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run -; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass -; CHECK-O23SZ-NEXT: Running pass: LCSSAPass -; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run +; CHECK-O-NEXT: Running pass: JumpThreadingPass +; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O-NEXT: Running pass: DSEPass +; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> +; CHECK-O-NEXT: Starting llvm::Function pass manager run +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Finished llvm::Function pass manager run ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index ad14bdd6df661..9ddf3f4f9c290 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O2 -S | FileCheck %s +; RUN: opt < %s -O1 -S | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. diff --git a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll index 6b3ba66c951eb..6934623463575 100644 --- a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll +++ b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll @@ -7,7 +7,7 @@ define i1 @PR33605(i32 %a, i32 %b, i32* %c) { ; ALL-LABEL: @PR33605( -; ALL-NEXT: entry: +; ALL-NEXT: for.body: ; ALL-NEXT: [[OR:%.*]] = or i32 [[B:%.*]], [[A:%.*]] ; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 ; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 @@ -15,16 +15,16 @@ define i1 @PR33605(i32 %a, i32 %b, i32* %c) { ; ALL-NEXT: br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; ALL: if.then: ; ALL-NEXT: store i32 [[OR]], i32* [[ARRAYIDX]], align 4 -; ALL-NEXT: call void @foo() +; ALL-NEXT: tail call void @foo() ; ALL-NEXT: br label [[IF_END]] ; ALL: if.end: -; ALL-NEXT: [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[ENTRY:%.*]] ] +; ALL-NEXT: [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[FOR_BODY:%.*]] ] ; ALL-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4 ; ALL-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]] ; ALL-NEXT: br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]] ; ALL: if.then.1: ; ALL-NEXT: store i32 [[OR]], i32* [[C]], align 4 -; ALL-NEXT: call void @foo() +; ALL-NEXT: tail call void @foo() ; ALL-NEXT: br label [[IF_END_1]] ; ALL: if.end.1: ; ALL-NEXT: [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll b/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll index 82f5cfbc9d517..4d4a30e00eafb 100644 --- a/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll +++ b/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll @@ -74,7 +74,7 @@ define i32 @two_shifts_by_same_sext(i32 %val, i8 signext %len) { define i32 @two_shifts_by_sext_with_extra_use(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_sext_with_extra_use( ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 -; CHECK-NEXT: call void @use_int32(i32 [[CONV]]) +; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]]) ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] ; CHECK-NEXT: ret i32 [[SHR]] @@ -101,7 +101,7 @@ declare void @use_int32(i32) define i32 @two_shifts_by_same_sext_with_extra_use(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_same_sext_with_extra_use( ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 -; CHECK-NEXT: call void @use_int32(i32 [[CONV]]) +; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]]) ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] ; CHECK-NEXT: ret i32 [[SHR]] From 6db023b99baa2f483d052232a30ca01637f549fc Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 25 Nov 2019 16:31:33 -0800 Subject: [PATCH 005/591] [BPF] add "llvm." prefix to BPF internally created globals Currently, BPF backend creates some global variables with name like ::$ to carry certain information to BPF backend. With direct clang compilation, the following code in llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp is triggered and the above globals are emitted to the ELF file. (clang enabled this as opt flag -faddrsig is on by default.) if (TM.Options.EmitAddrsig) { // Emit address-significance attributes for all globals. OutStreamer->EmitAddrsig(); for (const GlobalValue &GV : M.global_values()) if (!GV.use_empty() && !GV.isThreadLocal() && !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") && !GV.hasAtLeastLocalUnnamedAddr()) OutStreamer->EmitAddrsigSym(getSymbol(&GV)); } ... 10162: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND tcp_sock:0:2048$0:117 10163: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND tcp_sock:0:2112$0:126:0 10164: 0000000000000000 0 NOTYPE GLOBAL DEFAULT UND tcp_sock:1:8$0:31:6 ... While in llc, those globals are not emited since EmitAddrsig default option is false for llc. The llc flag "-addrsig" can be used to enable the above code. This patch added "llvm." prefix to these internal globals so that they can be ignored in the above codes and possible other places. Differential Revision: https://reviews.llvm.org/D70703 --- .../Target/BPF/BPFAbstractMemberAccess.cpp | 8 ++- .../CodeGen/BPF/CORE/no-elf-ama-symbol.ll | 65 +++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/CORE/no-elf-ama-symbol.ll diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 3af29a2e698b3..a28816cc87b7d 100644 --- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -829,9 +829,13 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, RecordAlignment); } - // Access key is the type name + reloc type + patched imm + access string, + // Access key is the + // "llvm." + type name + ":" + reloc type + ":" + patched imm + "$" + + // access string, // uniquely identifying one relocation. - AccessKey = TypeName + ":" + std::to_string(InfoKind) + ":" + + // The prefix "llvm." indicates this is a temporary global, which should + // not be emitted to ELF file. + AccessKey = "llvm." + TypeName + ":" + std::to_string(InfoKind) + ":" + std::to_string(PatchImm) + "$" + AccessKey; return Base; diff --git a/llvm/test/CodeGen/BPF/CORE/no-elf-ama-symbol.ll b/llvm/test/CodeGen/BPF/CORE/no-elf-ama-symbol.ll new file mode 100644 index 0000000000000..8851c502b6f0b --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/no-elf-ama-symbol.ll @@ -0,0 +1,65 @@ +; RUN: llc -march=bpfel -filetype=obj -o - %s | llvm-readelf -s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=obj -o - %s | llvm-readelf -s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfel -filetype=obj -addrsig -o - %s | llvm-readelf -s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=obj -addrsig -o - %s | llvm-readelf -s | FileCheck -check-prefixes=CHECK %s +; +; Source Code: +; struct tt { int a; } __attribute__((preserve_access_index)); +; int test(struct tt *arg) { +; return arg->a; +; } +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm t.c + +%struct.tt = type { i32 } + +; Function Attrs: nounwind readonly +define dso_local i32 @test(%struct.tt* readonly %arg) local_unnamed_addr #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata %struct.tt* %arg, metadata !16, metadata !DIExpression()), !dbg !17 + %0 = tail call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.tts(%struct.tt* %arg, i32 0, i32 0), !dbg !18, !llvm.preserve.access.index !12 + %1 = load i32, i32* %0, align 4, !dbg !18, !tbaa !19 + ret i32 %1, !dbg !24 +} + +; CHECK-NOT: llvm.tt:0:0$0:0 + +; Function Attrs: nounwind readnone +declare i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.tts(%struct.tt*, i32, i32) #1 + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + +attributes #0 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readnone speculatable} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 (https://github.com/llvm/llvm-project.git 947f9692440836dcb8d88b74b69dd379d85974ce)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/bug") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project.git 947f9692440836dcb8d88b74b69dd379d85974ce)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "tt", file: !1, line: 1, size: 32, elements: !13) +!13 = !{!14} +!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 1, baseType: !10, size: 32) +!15 = !{!16} +!16 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 2, type: !11) +!17 = !DILocation(line: 0, scope: !7) +!18 = !DILocation(line: 3, column: 15, scope: !7) +!19 = !{!20, !21, i64 0} +!20 = !{!"tt", !21, i64 0} +!21 = !{!"int", !22, i64 0} +!22 = !{!"omnipotent char", !23, i64 0} +!23 = !{!"Simple C/C++ TBAA"} +!24 = !DILocation(line: 3, column: 3, scope: !7) From 92f1446b8b8a1031d1676df5f90d5b5ca69e425b Mon Sep 17 00:00:00 2001 From: "Wang, Pengfei" Date: Tue, 26 Nov 2019 13:40:28 +0800 Subject: [PATCH 006/591] [X86] Updated strict fp scalar tests and add fp80 tests for D68857, NFC. --- llvm/test/CodeGen/X86/fp-strict-scalar.ll | 12 +-- llvm/test/CodeGen/X86/fp80-strict-scalar.ll | 94 ++++++++------------- 2 files changed, 42 insertions(+), 64 deletions(-) diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll index 724095e8aca39..8813047636ed2 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -70,8 +70,8 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp { ret double %ret } -define float @fadd_fsub_f32(float %a, float %b) nounwind strictfp { -; SSE-X86-LABEL: fadd_fsub_f32: +define float @fadd_f32(float %a, float %b) nounwind strictfp { +; SSE-X86-LABEL: fadd_f32: ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -81,12 +81,12 @@ define float @fadd_fsub_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; -; SSE-X64-LABEL: fadd_fsub_f32: +; SSE-X64-LABEL: fadd_f32: ; SSE-X64: # %bb.0: ; SSE-X64-NEXT: addss %xmm1, %xmm0 ; SSE-X64-NEXT: retq ; -; AVX-X86-LABEL: fadd_fsub_f32: +; AVX-X86-LABEL: fadd_f32: ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -96,12 +96,12 @@ define float @fadd_fsub_f32(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; -; AVX-X64-LABEL: fadd_fsub_f32: +; AVX-X64-LABEL: fadd_f32: ; AVX-X64: # %bb.0: ; AVX-X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX-X64-NEXT: retq ; -; X87-LABEL: fadd_fsub_f32: +; X87-LABEL: fadd_f32: ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fadds {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll index 1fc5d0196190d..2795008632204 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -O3 | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O3 | FileCheck %s --check-prefixes=CHECK,X64 declare x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) @@ -92,129 +92,107 @@ define x86_fp80 @fdiv_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ret x86_fp80 %ret } -define void @fpext_f32_to_fp80(float* %val, x86_fp80* %ret) nounwind strictfp { +define x86_fp80 @fpext_f32_to_fp80(float %a) nounwind strictfp { ; X86-LABEL: fpext_f32_to_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: flds (%ecx) -; X86-NEXT: fstpt (%eax) +; X86-NEXT: flds {{[0-9]+}}(%esp) ; X86-NEXT: retl ; ; X64-LABEL: fpext_f32_to_fp80: ; X64: # %bb.0: -; X64-NEXT: flds (%rdi) -; X64-NEXT: fstpt (%rsi) +; X64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: flds -{{[0-9]+}}(%rsp) ; X64-NEXT: retq - %1 = load float, float* %val, align 4 - %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %1, + %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %a, metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %ret, align 16 - ret void + ret x86_fp80 %ret + } -define void @fpext_f64_to_fp80(double* %val, x86_fp80* %ret) nounwind strictfp { +define x86_fp80 @fpext_f64_to_fp80(double %a) nounwind strictfp { ; X86-LABEL: fpext_f64_to_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: fldl (%ecx) -; X86-NEXT: fstpt (%eax) +; X86-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NEXT: retl ; ; X64-LABEL: fpext_f64_to_fp80: ; X64: # %bb.0: -; X64-NEXT: fldl (%rdi) -; X64-NEXT: fstpt (%rsi) +; X64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; X64-NEXT: fldl -{{[0-9]+}}(%rsp) ; X64-NEXT: retq - %1 = load double, double* %val, align 8 - %res = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %1, + %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %a, metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %ret, align 16 - ret void + ret x86_fp80 %ret + } -define void @fptrunc_fp80_to_f32(x86_fp80* %val, float *%ret) nounwind strictfp { +define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fptrunc_fp80_to_f32: ; X86: # %bb.0: ; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstps (%esp) ; X86-NEXT: flds (%esp) -; X86-NEXT: fstps (%eax) ; X86-NEXT: popl %eax ; X86-NEXT: retl ; ; X64-LABEL: fptrunc_fp80_to_f32: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstps -{{[0-9]+}}(%rsp) -; X64-NEXT: flds -{{[0-9]+}}(%rsp) -; X64-NEXT: fstps (%rsi) +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %val, align 16 - %res = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80 %1, + %ret = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store float %res, float* %ret, align 4 - ret void + ret float %ret + } -define void @fptrunc_fp80_to_f64(x86_fp80* %val, double* %ret) nounwind strictfp { +define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fptrunc_fp80_to_f64: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl 8(%ebp), %ecx -; X86-NEXT: fldt (%ecx) +; X86-NEXT: fldt 8(%ebp) ; X86-NEXT: fstpl (%esp) ; X86-NEXT: fldl (%esp) -; X86-NEXT: fstpl (%eax) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl ; ; X64-LABEL: fptrunc_fp80_to_f64: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstpl -{{[0-9]+}}(%rsp) -; X64-NEXT: fldl -{{[0-9]+}}(%rsp) -; X64-NEXT: fstpl (%rsi) +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %val, align 16 - %res = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80 %1, + %ret = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store double %res, double* %ret, align 8 - ret void + ret double %ret + } -define void @fsqrt_fp80(x86_fp80* %a) nounwind strictfp { +define x86_fp80 @fsqrt_fp80(x86_fp80 %a) nounwind strictfp { ; X86-LABEL: fsqrt_fp80: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: fldt (%eax) +; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fsqrt -; X86-NEXT: fstpt (%eax) ; X86-NEXT: retl ; ; X64-LABEL: fsqrt_fp80: ; X64: # %bb.0: -; X64-NEXT: fldt (%rdi) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fsqrt -; X64-NEXT: fstpt (%rdi) ; X64-NEXT: retq - %1 = load x86_fp80, x86_fp80* %a, align 16 - %res = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %1, + %ret = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 - store x86_fp80 %res, x86_fp80* %a, align 16 - ret void + ret x86_fp80 %ret + } attributes #0 = { strictfp } From a71c1e2a576a6b0f85cab2bef12446b0e3967853 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 25 Nov 2019 13:24:18 -0800 Subject: [PATCH 007/591] [ELF] Support input section description .rel[a].dyn in /DISCARD/ Reviewed By: ruiu Differential Revision: https://reviews.llvm.org/D70695 --- lld/ELF/LinkerScript.cpp | 2 +- .../ELF/linkerscript/discard-section-err.s | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index cebbd89168be5..a1561d2d41591 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -442,7 +442,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd) { } void LinkerScript::discard(InputSectionBase *s) { - if (s == in.shStrTab || s == mainPart->relaDyn || s == mainPart->relrDyn) + if (s == in.shStrTab || s == mainPart->relrDyn) error("discarding " + s->name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since diff --git a/lld/test/ELF/linkerscript/discard-section-err.s b/lld/test/ELF/linkerscript/discard-section-err.s index bb77dbb087da3..dd3c666e115b1 100644 --- a/lld/test/ELF/linkerscript/discard-section-err.s +++ b/lld/test/ELF/linkerscript/discard-section-err.s @@ -20,8 +20,19 @@ # RUN: ld.lld -pie -o %t --script %t.script %t.o # RUN: echo "SECTIONS { /DISCARD/ : { *(.rela.dyn) } }" > %t.script -# RUN: not ld.lld -pie -o %t --script %t.script %t.o 2>&1 | \ -# RUN: FileCheck -check-prefix=RELADYN %s -# RELADYN: discarding .rela.dyn section is not allowed +# RUN: ld.lld -pie -o %t %t.o +# RUN: llvm-readobj -S %t | FileCheck --check-prefix=RELADYN %s +# RELADYN: Name: .rela.dyn +# RUN: ld.lld -pie -o %t --script %t.script %t.o +# RUN: llvm-readobj -S %t | FileCheck /dev/null --implicit-check-not='Name: .rela.dyn' + +# RUN: echo "SECTIONS { /DISCARD/ : { *(.relr.dyn) } }" > %t.script +# RUN: not ld.lld -pie --pack-dyn-relocs=relr -o %t --script %t.script %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=RELRDYN %s +# RELRDYN: discarding .relr.dyn section is not allowed -.comm foo,4,4 +.data +.align 8 +foo: +## Emits an R_X86_64_RELATIVE in -pie mode. +.quad foo From 1b20908334847f4dc7b283f0493f4c59f1c62858 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 Nov 2019 23:07:43 -0800 Subject: [PATCH 008/591] [X86] Return Op instead of SDValue() for lowering flags_read/write intrinsics Returning SDValue() means we didn't handle it and the common code should try to expand it. But its a target intrinsic so expanding won't do anything and just leave the node alone. But it will print confusing debug messages. By returning Op we tell the common code that the node is legal and shouldn't receive any further processing. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 33f50e518bbe0..c3861adf09122 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24105,7 +24105,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, MFI.setHasCopyImplyingStackAdjustment(true); // Don't do anything here, we will expand these intrinsics out later // during FinalizeISel in EmitInstrWithCustomInserter. - return SDValue(); + return Op; } case Intrinsic::x86_lwpins32: case Intrinsic::x86_lwpins64: From 78a750276f442726193b007a0010432765abe9d4 Mon Sep 17 00:00:00 2001 From: Hideto Ueno Date: Tue, 26 Nov 2019 07:51:59 +0000 Subject: [PATCH 009/591] [Attributor] Track a GEP Instruction in align deduction Summary: This patch enables us to track GEP instruction in align deduction. If a pointer `B` is defined as `A+Offset` and known to have alignment `C`, there exists some integer Q such that ``` A + Offset = C * Q = B ``` So we can say that the maximum power of two which is a divisor of gcd(Offset, C) is an alignment. Reviewers: jdoerfert, sstefan1 Reviewed By: jdoerfert Subscribers: lebedev.ri, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70392 --- llvm/lib/Transforms/IPO/Attributor.cpp | 49 +++++++++++---- llvm/test/Transforms/FunctionAttrs/align.ll | 61 +++++++++++++++++++ .../Transforms/FunctionAttrs/arg_nocapture.ll | 3 +- .../Transforms/FunctionAttrs/nocapture.ll | 2 +- .../InferFunctionAttrs/dereferenceable.ll | 6 +- 5 files changed, 102 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 44d73b5ad5c04..366c347daeb1e 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3119,6 +3119,20 @@ static unsigned int getKnownAlignForUse(Attributor &A, AbstractAttribute &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &TrackUse) { + // We need to follow common pointer manipulation uses to the accesses they + // feed into. + if (isa(I)) { + TrackUse = true; + return 0; + } + if (auto *GEP = dyn_cast(I)) { + if (GEP->hasAllConstantIndices()) { + TrackUse = true; + return 0; + } + } + + unsigned Alignment = 0; if (ImmutableCallSite ICS = ImmutableCallSite(I)) { if (ICS.isBundleOperand(U) || ICS.isCallee(U)) return 0; @@ -3129,23 +3143,34 @@ static unsigned int getKnownAlignForUse(Attributor &A, // dependences here. auto &AlignAA = A.getAAFor(QueryingAA, IRP, /* TrackDependence */ false); - return AlignAA.getKnownAlign(); - } - - // We need to follow common pointer manipulation uses to the accesses they - // feed into. - // TODO: Consider gep instruction - if (isa(I)) { - TrackUse = true; - return 0; + Alignment = AlignAA.getKnownAlign(); } + const Value *UseV = U->get(); if (auto *SI = dyn_cast(I)) - return SI->getAlignment(); + Alignment = SI->getAlignment(); else if (auto *LI = dyn_cast(I)) - return LI->getAlignment(); + Alignment = LI->getAlignment(); - return 0; + if (Alignment <= 1) + return 0; + + auto &DL = A.getDataLayout(); + int64_t Offset; + + if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) { + if (Base == &AssociatedValue) { + // BasePointerAddr + Offset = Alignment * Q for some integer Q. + // So we can say that the maximum power of two which is a divisor of + // gcd(Offset, Alignment) is an alignment. + + uint32_t gcd = + greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), Alignment); + Alignment = llvm::PowerOf2Floor(gcd); + } + } + + return Alignment; } struct AAAlignImpl : AAAlign { AAAlignImpl(const IRPosition &IRP) : AAAlign(IRP) {} diff --git a/llvm/test/Transforms/FunctionAttrs/align.ll b/llvm/test/Transforms/FunctionAttrs/align.ll index da7bd1b5cc9a8..b8817a44fce79 100644 --- a/llvm/test/Transforms/FunctionAttrs/align.ll +++ b/llvm/test/Transforms/FunctionAttrs/align.ll @@ -337,5 +337,66 @@ define i64 @test11(i32* %p) { %ret = load i64, i64* %p-cast, align 8 ret i64 %ret } + +; TEST 12 +; Test for deduction using must-be-executed-context and GEP instruction + +; FXIME: %p should have nonnull +; ATTRIBUTOR: define i64 @test12-1(i32* nocapture nofree readonly align 16 %p) +define i64 @test12-1(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 1 + %arrayidx1 = getelementptr i64, i64* %arrayidx0, i64 3 + %ret = load i64, i64* %arrayidx1, align 16 + ret i64 %ret +} + +; FXIME: %p should have nonnull +; ATTRIBUTOR: define i64 @test12-2(i32* nocapture nofree readonly align 16 %p) +define i64 @test12-2(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 0 + %ret = load i64, i64* %arrayidx0, align 16 + ret i64 %ret +} + +; FXIME: %p should have nonnull +; ATTRIBUTOR: define void @test12-3(i32* nocapture nofree writeonly align 16 %p) +define void @test12-3(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 1 + %arrayidx1 = getelementptr i64, i64* %arrayidx0, i64 3 + store i64 0, i64* %arrayidx1, align 16 + ret void +} + +; FXIME: %p should have nonnull +; ATTRIBUTOR: define void @test12-4(i32* nocapture nofree writeonly align 16 %p) +define void @test12-4(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 0 + store i64 0, i64* %arrayidx0, align 16 + ret void +} + +declare void @use(i64*) willreturn nounwind + +; ATTRIBUTOR: define void @test12-5(i32* align 16 %p) +define void @test12-5(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 1 + %arrayidx1 = getelementptr i64, i64* %arrayidx0, i64 3 + tail call void @use(i64* align 16 %arrayidx1) + ret void +} + +; ATTRIBUTOR: define void @test12-6(i32* align 16 %p) +define void @test12-6(i32* align 4 %p) { + %p-cast = bitcast i32* %p to i64* + %arrayidx0 = getelementptr i64, i64* %p-cast, i64 0 + tail call void @use(i64* align 16 %arrayidx0) + ret void +} + attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } diff --git a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll index fa4d984e931b0..79075268ed410 100644 --- a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll @@ -260,8 +260,7 @@ entry: ; } ; ; There should *not* be a no-capture attribute on %a -; FIXME: %a should have align 8 -; CHECK: define nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree nonnull writeonly dereferenceable(16) "no-capture-maybe-returned" %a) +; CHECK: define nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree nonnull writeonly align 8 dereferenceable(16) "no-capture-maybe-returned" %a) define i64* @not_captured_but_returned_1(i64* %a) #0 { entry: %add.ptr = getelementptr inbounds i64, i64* %a, i64 1 diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll index c54559883ae3e..a78af1041672f 100644 --- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -259,7 +259,7 @@ define void @test_atomicrmw(i32* %p) { } ; FNATTR: define void @test_volatile(i32* %x) -; ATTRIBUTOR: define void @test_volatile(i32* nofree %x) +; ATTRIBUTOR: define void @test_volatile(i32* nofree align 4 %x) define void @test_volatile(i32* %x) { entry: %gep = getelementptr i32, i32* %x, i64 1 diff --git a/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll b/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll index a824c7995234d..322d85aba9c00 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll @@ -31,8 +31,7 @@ define <4 x double> @PR21780(double* %ptr) { define double @PR21780_only_access3_with_inbounds(double* %ptr) { ; CHECK-LABEL: @PR21780_only_access3_with_inbounds(double* %ptr) -; FIXME: %ptr should have align 8 -; ATTRIBUTOR-LABEL: @PR21780_only_access3_with_inbounds(double* nocapture nofree nonnull readonly dereferenceable(32) %ptr) +; ATTRIBUTOR-LABEL: @PR21780_only_access3_with_inbounds(double* nocapture nofree nonnull readonly align 8 dereferenceable(32) %ptr) %arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3 %t3 = load double, double* %arrayidx3, align 8 @@ -41,8 +40,7 @@ define double @PR21780_only_access3_with_inbounds(double* %ptr) { define double @PR21780_only_access3_without_inbounds(double* %ptr) { ; CHECK-LABEL: @PR21780_only_access3_without_inbounds(double* %ptr) -; FIXME: %ptr should have align 8 -; ATTRIBUTOR-LABEL: @PR21780_only_access3_without_inbounds(double* nocapture nofree readonly %ptr) +; ATTRIBUTOR-LABEL: @PR21780_only_access3_without_inbounds(double* nocapture nofree readonly align 8 %ptr) %arrayidx3 = getelementptr double, double* %ptr, i64 3 %t3 = load double, double* %arrayidx3, align 8 ret double %t3 From 19edd675c6321bc0447f459f427fa67ad46f5e2e Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 25 Nov 2019 12:39:03 +0300 Subject: [PATCH 010/591] [LLD][ELF] - Make compression level be dependent on -On. Currently LLD always use zlib compression level 6. This patch changes it to use 1 for -O0, -O1 and 6 for -O2. It fixes https://bugs.llvm.org/show_bug.cgi?id=44089. There was also a thread in llvm-dev on this topic: https://lists.llvm.org/pipermail/llvm-dev/2018-August/125020.html Here is a table with results of building clang mentioned there: ``` Level Time Size 0 0m17.128s 2045081496 Z_NO_COMPRESSION 1 0m31.471s 922618584 Z_BEST_SPEED 2 0m32.659s 903642376 3 0m36.749s 890805856 4 0m41.532s 876697184 5 0m48.383s 862778576 6 1m3.176s 855251640 Z_DEFAULT_COMPRESSION 7 1m15.335s 853755920 8 2m0.561s 852497560 9 2m33.972s 852397408 Z_BEST_COMPRESSION ``` It shows that it is probably not reasonable to use values greater than 6. Differential revision: https://reviews.llvm.org/D70658 --- lld/ELF/OutputSections.cpp | 7 ++++- lld/test/ELF/compressed-debug-level.test | 38 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 lld/test/ELF/compressed-debug-level.test diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index cc051dba0e0aa..8d328626b85f9 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -272,7 +272,12 @@ template void OutputSection::maybeCompress() { // Write section contents to a temporary buffer and compress it. std::vector buf(size); writeTo(buf.data()); - if (Error e = zlib::compress(toStringRef(buf), compressedData)) + // We chose 1 as the default compression level because it is the fastest. If + // -O2 is given, we use level 6 to compress debug info more by ~15%. We found + // that level 7 to 9 doesn't make much difference (~1% more compression) while + // they take significant amount of time (~2x), so level 6 seems enough. + if (Error e = zlib::compress(toStringRef(buf), compressedData, + config->optimize >= 2 ? 6 : 1)) fatal("compress failed: " + llvm::toString(std::move(e))); // Update section headers. diff --git a/lld/test/ELF/compressed-debug-level.test b/lld/test/ELF/compressed-debug-level.test new file mode 100644 index 0000000000000..38d7d9016d127 --- /dev/null +++ b/lld/test/ELF/compressed-debug-level.test @@ -0,0 +1,38 @@ +# REQUIRES: x86, zlib + +# RUN: yaml2obj %s -o %t.o + +# RUN: ld.lld %t.o -o %t.default --compress-debug-sections=zlib +# RUN: llvm-readelf --sections %t.default | FileCheck -check-prefixes=HEADER,LEVEL1 %s + +# RUN: ld.lld -O0 %t.o -o %t.O0 --compress-debug-sections=zlib +# RUN: llvm-readelf --sections %t.O0 | FileCheck -check-prefixes=HEADER,LEVEL1 %s +# RUN: cmp %t.default %t.O0 + +# RUN: ld.lld -O1 %t.o -o %t.O1 --compress-debug-sections=zlib +# RUN: llvm-readelf --sections %t.O1 | FileCheck -check-prefixes=HEADER,LEVEL1 %s +# RUN: cmp %t.default %t.O1 + +# RUN: ld.lld -O2 %t.o -o %t.O2 --compress-debug-sections=zlib +# RUN: llvm-readelf --sections %t.O2 | FileCheck -check-prefixes=HEADER,LEVEL6 %s + +## LLD uses zlib compression of level 1 when -O0, -O1 and level 6 when -O2. +## Here we check how -O flag affects the size of compressed sections produced. + +# HEADER: [Nr] Name Type Address Off Size +# LEVEL1: [ 1] .debug_info PROGBITS 00000000 000094 00001c +# LEVEL6: [ 1] .debug_info PROGBITS 00000000 000094 00001b + +## A little arbitrary debug section which has a different size after +## applying compression of level 1 and 6. + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_386 +Sections: + - Name: .debug_info + Type: SHT_PROGBITS + Content: '01010201020201020102' From 852bafae2bb4d875e8d206168a57667f59c0f9a6 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 23 Oct 2019 14:40:20 +0200 Subject: [PATCH 011/591] [clangd] Implement cross-file rename. Summary: This is the initial version. The cross-file rename is purely based on the index. It is hidden under a command-line flag, and only available for a small set of symbols. Reviewers: ilya-biryukov, sammccall Subscribers: merge_guards_bot, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D69263 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 36 +- clang-tools-extra/clangd/ClangdServer.cpp | 71 ++-- clang-tools-extra/clangd/ClangdServer.h | 8 +- clang-tools-extra/clangd/SourceCode.h | 3 + clang-tools-extra/clangd/TUScheduler.cpp | 7 + clang-tools-extra/clangd/TUScheduler.h | 3 + clang-tools-extra/clangd/refactor/Rename.cpp | 334 ++++++++++++++---- clang-tools-extra/clangd/refactor/Rename.h | 35 +- clang-tools-extra/clangd/refactor/Tweak.h | 4 +- clang-tools-extra/clangd/tool/ClangdMain.cpp | 11 + .../clangd/unittests/RenameTests.cpp | 145 +++++++- .../clangd/unittests/SyncAPI.cpp | 7 +- clang-tools-extra/clangd/unittests/SyncAPI.h | 4 +- 13 files changed, 523 insertions(+), 145 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 4fe8158180749..57ed97f7a7825 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -103,13 +103,13 @@ std::vector> buildHighlightScopeLookupTable() { return LookupTable; } -// Makes sure edits in \p E are applicable to latest file contents reported by +// Makes sure edits in \p FE are applicable to latest file contents reported by // editor. If not generates an error message containing information about files // that needs to be saved. -llvm::Error validateEdits(const DraftStore &DraftMgr, const Tweak::Effect &E) { +llvm::Error validateEdits(const DraftStore &DraftMgr, const FileEdits &FE) { size_t InvalidFileCount = 0; llvm::StringRef LastInvalidFile; - for (const auto &It : E.ApplyEdits) { + for (const auto &It : FE) { if (auto Draft = DraftMgr.getDraft(It.first())) { // If the file is open in user's editor, make sure the version we // saw and current version are compatible as this is the text that @@ -704,7 +704,7 @@ void ClangdLSPServer::onCommand(const ExecuteCommandParams &Params, if (R->ApplyEdits.empty()) return Reply("Tweak applied."); - if (auto Err = validateEdits(DraftMgr, *R)) + if (auto Err = validateEdits(DraftMgr, R->ApplyEdits)) return Reply(std::move(Err)); WorkspaceEdit WE; @@ -758,17 +758,23 @@ void ClangdLSPServer::onRename(const RenameParams &Params, if (!Code) return Reply(llvm::make_error( "onRename called for non-added file", ErrorCode::InvalidParams)); - - Server->rename(File, Params.position, Params.newName, /*WantFormat=*/true, - [File, Code, Params, Reply = std::move(Reply)]( - llvm::Expected> Edits) mutable { - if (!Edits) - return Reply(Edits.takeError()); - - WorkspaceEdit WE; - WE.changes = {{Params.textDocument.uri.uri(), *Edits}}; - Reply(WE); - }); + Server->rename( + File, Params.position, Params.newName, + /*WantFormat=*/true, + [File, Params, Reply = std::move(Reply), + this](llvm::Expected Edits) mutable { + if (!Edits) + return Reply(Edits.takeError()); + if (auto Err = validateEdits(DraftMgr, *Edits)) + return Reply(std::move(Err)); + WorkspaceEdit Result; + Result.changes.emplace(); + for (const auto &Rep : *Edits) { + (*Result.changes)[URI::createFile(Rep.first()).toString()] = + Rep.second.asTextEdits(); + } + Reply(Result); + }); } void ClangdLSPServer::onDocumentDidClose( diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 5a9833d78b48e..6c5fabdce5c34 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -119,7 +119,8 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, : nullptr), GetClangTidyOptions(Opts.GetClangTidyOptions), SuggestMissingIncludes(Opts.SuggestMissingIncludes), - TweakFilter(Opts.TweakFilter), WorkspaceRoot(Opts.WorkspaceRoot), + CrossFileRename(Opts.CrossFileRename), TweakFilter(Opts.TweakFilter), + WorkspaceRoot(Opts.WorkspaceRoot), // Pass a callback into `WorkScheduler` to extract symbols from a newly // parsed file and rebuild the file index synchronously each time an AST // is parsed. @@ -308,54 +309,68 @@ void ClangdServer::prepareRename(PathRef File, Position Pos, if (!InpAST) return CB(InpAST.takeError()); auto &AST = InpAST->AST; - // Performing the rename isn't substantially more expensive than doing an - // AST-based check, so we just rename and throw away the results. We may - // have to revisit this when we support cross-file rename. - auto Changes = renameWithinFile(AST, File, Pos, "dummy", Index); + const auto &SM = AST.getSourceManager(); + SourceLocation Loc = + SM.getMacroArgExpandedLocation(getBeginningOfIdentifier( + Pos, AST.getSourceManager(), AST.getASTContext().getLangOpts())); + auto Range = getTokenRange(SM, AST.getASTContext().getLangOpts(), Loc); + if (!Range) + return CB(llvm::None); // "rename" is not valid at the position. + + if (CrossFileRename) + // FIXME: we now assume cross-file rename always succeeds, revisit this. + return CB(*Range); + + // Performing the local rename isn't substantially more expensive than + // doing an AST-based check, so we just rename and throw away the results. + auto Changes = clangd::rename({Pos, "dummy", AST, File, Index, + /*AllowCrossFile=*/false, + /*GetDirtyBuffer=*/nullptr}); if (!Changes) { // LSP says to return null on failure, but that will result in a generic // failure message. If we send an LSP error response, clients can surface // the message to users (VSCode does). return CB(Changes.takeError()); } - SourceLocation Loc = getBeginningOfIdentifier( - Pos, AST.getSourceManager(), AST.getASTContext().getLangOpts()); - if (auto Range = getTokenRange(AST.getSourceManager(), - AST.getASTContext().getLangOpts(), Loc)) - return CB(*Range); - // Return null if the "rename" is not valid at the position. - CB(llvm::None); + return CB(*Range); }; WorkScheduler.runWithAST("PrepareRename", File, std::move(Action)); } void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName, - bool WantFormat, Callback> CB) { + bool WantFormat, Callback CB) { + // A snapshot of all file dirty buffers. + llvm::StringMap Snapshot = WorkScheduler.getAllFileContents(); auto Action = [File = File.str(), NewName = NewName.str(), Pos, WantFormat, - CB = std::move(CB), + CB = std::move(CB), Snapshot = std::move(Snapshot), this](llvm::Expected InpAST) mutable { if (!InpAST) return CB(InpAST.takeError()); - auto Changes = renameWithinFile(InpAST->AST, File, Pos, NewName, Index); - if (!Changes) - return CB(Changes.takeError()); + auto GetDirtyBuffer = + [&Snapshot](PathRef AbsPath) -> llvm::Optional { + auto It = Snapshot.find(AbsPath); + if (It == Snapshot.end()) + return llvm::None; + return It->second; + }; + auto Edits = clangd::rename({Pos, NewName, InpAST->AST, File, Index, + CrossFileRename, GetDirtyBuffer}); + if (!Edits) + return CB(Edits.takeError()); if (WantFormat) { auto Style = getFormatStyleForFile(File, InpAST->Inputs.Contents, InpAST->Inputs.FS.get()); - if (auto Formatted = - cleanupAndFormat(InpAST->Inputs.Contents, *Changes, Style)) - *Changes = std::move(*Formatted); - else - elog("Failed to format replacements: {0}", Formatted.takeError()); - } + llvm::Error Err = llvm::Error::success(); + for (auto &E : *Edits) + Err = + llvm::joinErrors(reformatEdit(E.getValue(), Style), std::move(Err)); - std::vector Edits; - for (const auto &Rep : *Changes) - Edits.push_back(replacementToEdit(InpAST->Inputs.Contents, Rep)); - return CB(std::move(Edits)); + if (Err) + return CB(std::move(Err)); + } + return CB(std::move(*Edits)); }; - WorkScheduler.runWithAST("Rename", File, std::move(Action)); } diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index cd0b91c08f084..499340808765b 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -24,6 +24,7 @@ #include "index/Background.h" #include "index/FileIndex.h" #include "index/Index.h" +#include "refactor/Rename.h" #include "refactor/Tweak.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Core/Replacement.h" @@ -133,6 +134,9 @@ class ClangdServer { /// Enable semantic highlighting features. bool SemanticHighlighting = false; + /// Enable cross-file rename feature. + bool CrossFileRename = false; + /// Returns true if the tweak should be enabled. std::function TweakFilter = [](const Tweak &T) { return !T.hidden(); // only enable non-hidden tweaks. @@ -252,7 +256,7 @@ class ClangdServer { /// embedders could use this method to get all occurrences of the symbol (e.g. /// highlighting them in prepare stage). void rename(PathRef File, Position Pos, llvm::StringRef NewName, - bool WantFormat, Callback> CB); + bool WantFormat, Callback CB); struct TweakRef { std::string ID; /// ID to pass for applyTweak. @@ -327,6 +331,8 @@ class ClangdServer { // can be caused by missing includes (e.g. member access in incomplete type). bool SuggestMissingIncludes = false; + bool CrossFileRename = false; + std::function TweakFilter; // GUARDED_BY(CachedCompletionFuzzyFindRequestMutex) diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h index 3b8aacef9bf17..f75be998dc2d4 100644 --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -223,6 +223,9 @@ struct Edit { /// Checks whether the Replacements are applicable to given Code. bool canApplyTo(llvm::StringRef Code) const; }; +/// A mapping from absolute file path (the one used for accessing the underlying +/// VFS) to edits. +using FileEdits = llvm::StringMap; /// Formats the edits and code around it according to Style. Changes /// Replacements to formatted ones if succeeds. diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index 6436e7a50c615..d740c38736957 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -916,6 +916,13 @@ llvm::StringRef TUScheduler::getContents(PathRef File) const { return It->second->Contents; } +llvm::StringMap TUScheduler::getAllFileContents() const { + llvm::StringMap Results; + for (auto &It : Files) + Results.try_emplace(It.getKey(), It.getValue()->Contents); + return Results; +} + void TUScheduler::run(llvm::StringRef Name, llvm::unique_function Action) { if (!PreambleTasks) diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h index ff2d4d485047f..de3b895499831 100644 --- a/clang-tools-extra/clangd/TUScheduler.h +++ b/clang-tools-extra/clangd/TUScheduler.h @@ -180,6 +180,9 @@ class TUScheduler { /// The returned StringRef may be invalidated by any write to TUScheduler. llvm::StringRef getContents(PathRef File) const; + /// Returns a snapshot of all file buffer contents, per last update(). + llvm::StringMap getAllFileContents() const; + /// Schedule an async task with no dependencies. void run(llvm::StringRef Name, llvm::unique_function Action); diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index fb83083384f95..d4b186b4ca909 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -18,6 +18,8 @@ #include "clang/AST/DeclTemplate.h" #include "clang/Basic/SourceLocation.h" #include "clang/Tooling/Refactoring/Rename/USRFindingAction.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Error.h" namespace clang { namespace clangd { @@ -55,8 +57,7 @@ llvm::Optional getOtherRefFile(const Decl &D, StringRef MainFile, // tradeoff. We expect the number of symbol references in the current file // is smaller than the limit. Req.Limit = 100; - if (auto ID = getSymbolID(&D)) - Req.IDs.insert(*ID); + Req.IDs.insert(*getSymbolID(&D)); llvm::Optional OtherFile; Index.refs(Req, [&](const Ref &R) { if (OtherFile) @@ -101,71 +102,95 @@ enum ReasonToReject { NoSymbolFound, NoIndexProvided, NonIndexable, - UsedOutsideFile, + UsedOutsideFile, // for within-file rename only. UnsupportedSymbol, AmbiguousSymbol, }; -// Check the symbol Decl is renameable (per the index) within the file. -llvm::Optional renamableWithinFile(const Decl &RenameDecl, - StringRef MainFile, - const SymbolIndex *Index) { +llvm::Optional renameable(const Decl &RenameDecl, + StringRef MainFilePath, + const SymbolIndex *Index, + bool CrossFile) { + // Filter out symbols that are unsupported in both rename modes. if (llvm::isa(&RenameDecl)) return ReasonToReject::UnsupportedSymbol; if (const auto *FD = llvm::dyn_cast(&RenameDecl)) { if (FD->isOverloadedOperator()) return ReasonToReject::UnsupportedSymbol; } - auto &ASTCtx = RenameDecl.getASTContext(); - const auto &SM = ASTCtx.getSourceManager(); - bool MainFileIsHeader = isHeaderFile(MainFile, ASTCtx.getLangOpts()); - bool DeclaredInMainFile = isInsideMainFile(RenameDecl.getBeginLoc(), SM); - - if (!DeclaredInMainFile) - // We are sure the symbol is used externally, bail out early. - return UsedOutsideFile; - - // If the symbol is declared in the main file (which is not a header), we - // rename it. - if (!MainFileIsHeader) - return None; - - // Below are cases where the symbol is declared in the header. - // If the symbol is function-local, we rename it. + // function-local symbols is safe to rename. if (RenameDecl.getParentFunctionOrMethod()) return None; + bool IsIndexable = + isa(RenameDecl) && + SymbolCollector::shouldCollectSymbol( + cast(RenameDecl), RenameDecl.getASTContext(), + SymbolCollector::Options(), CrossFile); + if (!IsIndexable) // If the symbol is not indexable, we disallow rename. + return ReasonToReject::NonIndexable; + + if (!CrossFile) { + auto &ASTCtx = RenameDecl.getASTContext(); + const auto &SM = ASTCtx.getSourceManager(); + bool MainFileIsHeader = isHeaderFile(MainFilePath, ASTCtx.getLangOpts()); + bool DeclaredInMainFile = isInsideMainFile(RenameDecl.getBeginLoc(), SM); + + if (!DeclaredInMainFile) + // We are sure the symbol is used externally, bail out early. + return ReasonToReject::UsedOutsideFile; + + // If the symbol is declared in the main file (which is not a header), we + // rename it. + if (!MainFileIsHeader) + return None; + + if (!Index) + return ReasonToReject::NoIndexProvided; + + auto OtherFile = getOtherRefFile(RenameDecl, MainFilePath, *Index); + // If the symbol is indexable and has no refs from other files in the index, + // we rename it. + if (!OtherFile) + return None; + // If the symbol is indexable and has refs from other files in the index, + // we disallow rename. + return ReasonToReject::UsedOutsideFile; + } + + assert(CrossFile); if (!Index) return ReasonToReject::NoIndexProvided; - bool IsIndexable = isa(RenameDecl) && - SymbolCollector::shouldCollectSymbol( - cast(RenameDecl), ASTCtx, {}, false); - // If the symbol is not indexable, we disallow rename. - if (!IsIndexable) - return ReasonToReject::NonIndexable; - auto OtherFile = getOtherRefFile(RenameDecl, MainFile, *Index); - // If the symbol is indexable and has no refs from other files in the index, - // we rename it. - if (!OtherFile) - return None; - // If the symbol is indexable and has refs from other files in the index, - // we disallow rename. - return ReasonToReject::UsedOutsideFile; + // Blacklist symbols that are not supported yet in cross-file mode due to the + // limitations of our index. + // FIXME: renaming templates requries to rename all related specializations, + // our index doesn't have this information. + if (RenameDecl.getDescribedTemplate()) + return ReasonToReject::UnsupportedSymbol; + + // FIXME: renaming virtual methods requires to rename all overridens in + // subclasses, our index doesn't have this information. + // Note: within-file rename does support this through the AST. + if (const auto *S = llvm::dyn_cast(&RenameDecl)) { + if (S->isVirtual()) + return ReasonToReject::UnsupportedSymbol; + } + return None; } llvm::Error makeError(ReasonToReject Reason) { auto Message = [](ReasonToReject Reason) { switch (Reason) { - case NoSymbolFound: + case ReasonToReject::NoSymbolFound: return "there is no symbol at the given location"; - case NoIndexProvided: + case ReasonToReject::NoIndexProvided: return "symbol may be used in other files (no index available)"; - case UsedOutsideFile: + case ReasonToReject::UsedOutsideFile: return "the symbol is used outside main file"; - case NonIndexable: + case ReasonToReject::NonIndexable: return "symbol may be used in other files (not eligible for indexing)"; - case UnsupportedSymbol: + case ReasonToReject::UnsupportedSymbol: return "symbol is not a supported kind (e.g. namespace, macro)"; case AmbiguousSymbol: return "there are multiple symbols at the given location"; @@ -212,35 +237,14 @@ std::vector findOccurrencesWithinFile(ParsedAST &AST, return Results; } -} // namespace - +// AST-based rename, it renames all occurrences in the main file. llvm::Expected -renameWithinFile(ParsedAST &AST, llvm::StringRef File, Position Pos, - llvm::StringRef NewName, const SymbolIndex *Index) { +renameWithinFile(ParsedAST &AST, const NamedDecl &RenameDecl, + llvm::StringRef NewName) { const SourceManager &SM = AST.getSourceManager(); - SourceLocation SourceLocationBeg = SM.getMacroArgExpandedLocation( - getBeginningOfIdentifier(Pos, SM, AST.getASTContext().getLangOpts())); - // FIXME: renaming macros is not supported yet, the macro-handling code should - // be moved to rename tooling library. - if (locateMacroAt(SourceLocationBeg, AST.getPreprocessor())) - return makeError(UnsupportedSymbol); - - auto DeclsUnderCursor = locateDeclAt(AST, SourceLocationBeg); - if (DeclsUnderCursor.empty()) - return makeError(NoSymbolFound); - if (DeclsUnderCursor.size() > 1) - return makeError(AmbiguousSymbol); - - const auto *RenameDecl = llvm::dyn_cast(*DeclsUnderCursor.begin()); - if (!RenameDecl) - return makeError(UnsupportedSymbol); - - if (auto Reject = - renamableWithinFile(*RenameDecl->getCanonicalDecl(), File, Index)) - return makeError(*Reject); tooling::Replacements FilteredChanges; - for (SourceLocation Loc : findOccurrencesWithinFile(AST, *RenameDecl)) { + for (SourceLocation Loc : findOccurrencesWithinFile(AST, RenameDecl)) { SourceLocation RenameLoc = Loc; // We don't rename in any macro bodies, but we allow rename the symbol // spelled in a top-level macro argument in the main file. @@ -265,5 +269,201 @@ renameWithinFile(ParsedAST &AST, llvm::StringRef File, Position Pos, return FilteredChanges; } +Range toRange(const SymbolLocation &L) { + Range R; + R.start.line = L.Start.line(); + R.start.character = L.Start.column(); + R.end.line = L.End.line(); + R.end.character = L.End.column(); + return R; +}; + +// Return all rename occurrences (per the index) outside of the main file, +// grouped by the absolute file path. +llvm::StringMap> +findOccurrencesOutsideFile(const NamedDecl &RenameDecl, + llvm::StringRef MainFile, const SymbolIndex &Index) { + RefsRequest RQuest; + RQuest.IDs.insert(*getSymbolID(&RenameDecl)); + + // Absolute file path => rename ocurrences in that file. + llvm::StringMap> AffectedFiles; + Index.refs(RQuest, [&](const Ref &R) { + if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) { + if (*RefFilePath != MainFile) + AffectedFiles[*RefFilePath].push_back(toRange(R.Location)); + } + }); + return AffectedFiles; +} + +llvm::Expected> toRangeOffset(const clangd::Range &R, + llvm::StringRef Code) { + auto StartOffset = positionToOffset(Code, R.start); + if (!StartOffset) + return StartOffset.takeError(); + auto EndOffset = positionToOffset(Code, R.end); + if (!EndOffset) + return EndOffset.takeError(); + return std::make_pair(*StartOffset, *EndOffset); +}; + +llvm::Expected buildRenameEdit(llvm::StringRef InitialCode, + const std::vector &Occurrences, + llvm::StringRef NewName) { + tooling::Replacements RenameEdit; + for (const Range &Occurrence : Occurrences) { + // FIXME: !positionToOffset is O(N), optimize it. + auto RangeOffset = toRangeOffset(Occurrence, InitialCode); + if (!RangeOffset) + return RangeOffset.takeError(); + auto ByteLength = RangeOffset->second - RangeOffset->first; + if (auto Err = RenameEdit.add(tooling::Replacement( + InitialCode, RangeOffset->first, ByteLength, NewName))) + return std::move(Err); + } + return Edit(InitialCode, std::move(RenameEdit)); +} + +// Index-based rename, it renames all occurrences outside of the main file. +// +// The cross-file rename is purely based on the index, as we don't want to +// build all ASTs for affected files, which may cause a performance hit. +// We choose to trade off some correctness for performance and scalability. +// +// Clangd builds a dynamic index for all opened files on top of the static +// index of the whole codebase. Dynamic index is up-to-date (respects dirty +// buffers) as long as clangd finishes processing opened files, while static +// index (background index) is relatively stale. We choose the dirty buffers +// as the file content we rename on, and fallback to file content on disk if +// there is no dirty buffer. +// +// FIXME: add range patching heuristics to detect staleness of the index, and +// report to users. +// FIXME: our index may return implicit references, which are non-eligitble +// for rename, we should filter out these references. +llvm::Expected renameOutsideFile( + const NamedDecl &RenameDecl, llvm::StringRef MainFilePath, + llvm::StringRef NewName, const SymbolIndex &Index, + llvm::function_ref(PathRef)> GetFileContent) { + auto AffectedFiles = + findOccurrencesOutsideFile(RenameDecl, MainFilePath, Index); + // FIXME: make the limit customizable. + static constexpr size_t MaxLimitFiles = 50; + if (AffectedFiles.size() >= MaxLimitFiles) + return llvm::make_error( + llvm::formatv( + "The number of affected files exceeds the max limit {0}: {1}", + MaxLimitFiles, AffectedFiles.size()), + llvm::inconvertibleErrorCode()); + + FileEdits Results; + for (const auto &FileAndOccurrences : AffectedFiles) { + llvm::StringRef FilePath = FileAndOccurrences.first(); + + auto AffectedFileCode = GetFileContent(FilePath); + if (!AffectedFileCode) { + elog("Fail to read file content: {0}", AffectedFileCode.takeError()); + continue; + } + + auto RenameEdit = buildRenameEdit(*AffectedFileCode, + FileAndOccurrences.getValue(), NewName); + if (!RenameEdit) + return RenameEdit.takeError(); + if (!RenameEdit->Replacements.empty()) + Results.insert({FilePath, std::move(*RenameEdit)}); + } + return Results; +} + +} // namespace + +llvm::Expected rename(const RenameInputs &RInputs) { + ParsedAST &AST = RInputs.AST; + const SourceManager &SM = AST.getSourceManager(); + llvm::StringRef MainFileCode = SM.getBufferData(SM.getMainFileID()); + auto GetFileContent = [&RInputs, + &SM](PathRef AbsPath) -> llvm::Expected { + llvm::Optional DirtyBuffer; + if (RInputs.GetDirtyBuffer && + (DirtyBuffer = RInputs.GetDirtyBuffer(AbsPath))) + return std::move(*DirtyBuffer); + + auto Content = + SM.getFileManager().getVirtualFileSystem().getBufferForFile(AbsPath); + if (!Content) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + llvm::formatv("Fail to open file {0}: {1}", AbsPath, + Content.getError().message())); + if (!*Content) + return llvm::createStringError( + llvm::inconvertibleErrorCode(), + llvm::formatv("Got no buffer for file {0}", AbsPath)); + + return (*Content)->getBuffer().str(); + }; + SourceLocation SourceLocationBeg = + SM.getMacroArgExpandedLocation(getBeginningOfIdentifier( + RInputs.Pos, SM, AST.getASTContext().getLangOpts())); + // FIXME: renaming macros is not supported yet, the macro-handling code should + // be moved to rename tooling library. + if (locateMacroAt(SourceLocationBeg, AST.getPreprocessor())) + return makeError(ReasonToReject::UnsupportedSymbol); + + auto DeclsUnderCursor = locateDeclAt(AST, SourceLocationBeg); + if (DeclsUnderCursor.empty()) + return makeError(ReasonToReject::NoSymbolFound); + if (DeclsUnderCursor.size() > 1) + return makeError(ReasonToReject::AmbiguousSymbol); + + const auto *RenameDecl = llvm::dyn_cast(*DeclsUnderCursor.begin()); + if (!RenameDecl) + return makeError(ReasonToReject::UnsupportedSymbol); + + auto Reject = + renameable(*RenameDecl->getCanonicalDecl(), RInputs.MainFilePath, + RInputs.Index, RInputs.AllowCrossFile); + if (Reject) + return makeError(*Reject); + + // We have two implemenations of the rename: + // - AST-based rename: used for renaming local symbols, e.g. variables + // defined in a function body; + // - index-based rename: used for renaming non-local symbols, and not + // feasible for local symbols (as by design our index don't index these + // symbols by design; + // To make cross-file rename work for local symbol, we use a hybrid solution: + // - run AST-based rename on the main file; + // - run index-based rename on other affected files; + auto MainFileRenameEdit = renameWithinFile(AST, *RenameDecl, RInputs.NewName); + if (!MainFileRenameEdit) + return MainFileRenameEdit.takeError(); + + if (!RInputs.AllowCrossFile) { + // within-file rename, just return the main file results. + return FileEdits( + {std::make_pair(RInputs.MainFilePath, + Edit{MainFileCode, std::move(*MainFileRenameEdit)})}); + } + + FileEdits Results; + // renameable safely guards us that at this point we are renaming a local + // symbol if we don't have index, + if (RInputs.Index) { + auto OtherFilesEdits = + renameOutsideFile(*RenameDecl, RInputs.MainFilePath, RInputs.NewName, + *RInputs.Index, GetFileContent); + if (!OtherFilesEdits) + return OtherFilesEdits.takeError(); + Results = std::move(*OtherFilesEdits); + } + // Attach the rename edits for the main file. + Results.try_emplace(RInputs.MainFilePath, MainFileCode, + std::move(*MainFileRenameEdit)); + return Results; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h index 63a1ffe321508..1427d7042585b 100644 --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -9,7 +9,9 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_REFACTOR_RENAME_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_REFACTOR_RENAME_H +#include "Path.h" #include "Protocol.h" +#include "SourceCode.h" #include "clang/Tooling/Core/Replacement.h" #include "llvm/Support/Error.h" @@ -18,13 +20,32 @@ namespace clangd { class ParsedAST; class SymbolIndex; -/// Renames all occurrences of the symbol at \p Pos to \p NewName. -/// Occurrences outside the current file are not modified. -/// Returns an error if rename a symbol that's used in another file (per the -/// index). -llvm::Expected -renameWithinFile(ParsedAST &AST, llvm::StringRef File, Position Pos, - llvm::StringRef NewName, const SymbolIndex *Index = nullptr); +/// Gets dirty buffer for a given file \p AbsPath. +/// Returns None if there is no dirty buffer for the given file. +using DirtyBufferGetter = + llvm::function_ref(PathRef AbsPath)>; + +struct RenameInputs { + Position Pos; // the position triggering the rename + llvm::StringRef NewName; + + ParsedAST &AST; + llvm::StringRef MainFilePath; + + const SymbolIndex *Index = nullptr; + + bool AllowCrossFile = false; + // When set, used by the rename to get file content for all rename-related + // files. + // If there is no corresponding dirty buffer, we will use the file content + // from disk. + DirtyBufferGetter GetDirtyBuffer = nullptr; +}; + +/// Renames all occurrences of the symbol. +/// If AllowCrossFile is false, returns an error if rename a symbol that's used +/// in another file (per the index). +llvm::Expected rename(const RenameInputs &RInputs); } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/Tweak.h b/clang-tools-extra/clangd/refactor/Tweak.h index de655abd98c7b..69ac4ad612e9d 100644 --- a/clang-tools-extra/clangd/refactor/Tweak.h +++ b/clang-tools-extra/clangd/refactor/Tweak.h @@ -77,9 +77,7 @@ class Tweak { struct Effect { /// A message to be displayed to the user. llvm::Optional ShowMessage; - /// A mapping from file path(the one used for accessing the underlying VFS) - /// to edits. - llvm::StringMap ApplyEdits; + FileEdits ApplyEdits; static Effect showMessage(StringRef S) { Effect E; diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 2639df31dbe8d..608a2da681342 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -264,6 +264,16 @@ list TweakList{ CommaSeparated, }; +opt CrossFileRename{ + "cross-file-rename", + cat(Features), + desc("Enable cross-file rename feature. Note that this feature is " + "experimental and may lead to broken code or incomplete rename " + "results"), + init(false), + Hidden, +}; + opt WorkerThreadsCount{ "j", cat(Misc), @@ -595,6 +605,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var } Opts.StaticIndex = StaticIdx.get(); Opts.AsyncThreadsCount = WorkerThreadsCount; + Opts.CrossFileRename = CrossFileRename; clangd::CodeCompleteOptions CCOpts; CCOpts.IncludeIneligibleResults = IncludeIneligibleResults; diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 8dedcf579fd33..75b15e735abf7 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -9,8 +9,10 @@ #include "Annotations.h" #include "TestFS.h" #include "TestTU.h" +#include "index/Ref.h" #include "refactor/Rename.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/Support/MemoryBuffer.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -18,8 +20,45 @@ namespace clang { namespace clangd { namespace { -MATCHER_P2(RenameRange, Code, Range, "") { - return replacementToEdit(Code, arg).range == Range; +using testing::Eq; +using testing::Pair; +using testing::UnorderedElementsAre; + +// Build a RefSlab from all marked ranges in the annotation. The ranges are +// assumed to associate with the given SymbolName. +std::unique_ptr buildRefSlab(const Annotations &Code, + llvm::StringRef SymbolName, + llvm::StringRef Path) { + RefSlab::Builder Builder; + TestTU TU; + TU.HeaderCode = Code.code(); + auto Symbols = TU.headerSymbols(); + const auto &SymbolID = findSymbol(Symbols, SymbolName).ID; + for (const auto &Range : Code.ranges()) { + Ref R; + R.Kind = RefKind::Reference; + R.Location.Start.setLine(Range.start.line); + R.Location.Start.setColumn(Range.start.character); + R.Location.End.setLine(Range.end.line); + R.Location.End.setColumn(Range.end.character); + auto U = URI::create(Path).toString(); + R.Location.FileURI = U.c_str(); + Builder.insert(SymbolID, R); + } + + return std::make_unique(std::move(Builder).build()); +} + +std::vector< + std::pair> +applyEdits(FileEdits FE) { + std::vector> Results; + for (auto &It : FE) + Results.emplace_back( + It.first().str(), + llvm::cantFail(tooling::applyAllReplacements( + It.getValue().InitialCode, It.getValue().Replacements))); + return Results; } // Generates an expected rename result by replacing all ranges in the given @@ -363,11 +402,11 @@ TEST(RenameTest, WithinFileRename) { llvm::StringRef NewName = "abcde"; for (const auto &RenamePos : Code.points()) { auto RenameResult = - renameWithinFile(AST, testPath(TU.Filename), RenamePos, NewName); - ASSERT_TRUE(bool(RenameResult)) << RenameResult.takeError() << T; - auto ApplyResult = llvm::cantFail( - tooling::applyAllReplacements(Code.code(), *RenameResult)); - EXPECT_EQ(expectedResult(Code, NewName), ApplyResult); + rename({RenamePos, NewName, AST, testPath(TU.Filename)}); + ASSERT_TRUE(bool(RenameResult)) << RenameResult.takeError(); + ASSERT_EQ(1u, RenameResult->size()); + EXPECT_EQ(applyEdits(std::move(*RenameResult)).front().second, + expectedResult(Code, NewName)); } } } @@ -480,23 +519,23 @@ TEST(RenameTest, Renameable) { } auto AST = TU.build(); llvm::StringRef NewName = "dummyNewName"; - auto Results = renameWithinFile(AST, testPath(TU.Filename), T.point(), - NewName, Case.Index); + auto Results = + rename({T.point(), NewName, AST, testPath(TU.Filename), Case.Index}); bool WantRename = true; if (T.ranges().empty()) WantRename = false; if (!WantRename) { assert(Case.ErrorMessage && "Error message must be set!"); EXPECT_FALSE(Results) - << "expected renameWithinFile returned an error: " << T.code(); + << "expected rename returned an error: " << T.code(); auto ActualMessage = llvm::toString(Results.takeError()); EXPECT_THAT(ActualMessage, testing::HasSubstr(Case.ErrorMessage)); } else { - EXPECT_TRUE(bool(Results)) << "renameWithinFile returned an error: " + EXPECT_TRUE(bool(Results)) << "rename returned an error: " << llvm::toString(Results.takeError()); - auto ApplyResult = - llvm::cantFail(tooling::applyAllReplacements(T.code(), *Results)); - EXPECT_EQ(expectedResult(T, NewName), ApplyResult); + ASSERT_EQ(1u, Results->size()); + EXPECT_EQ(applyEdits(std::move(*Results)).front().second, + expectedResult(T, NewName)); } } } @@ -522,11 +561,81 @@ TEST(RenameTest, MainFileReferencesOnly) { llvm::StringRef NewName = "abcde"; auto RenameResult = - renameWithinFile(AST, testPath(TU.Filename), Code.point(), NewName); + rename({Code.point(), NewName, AST, testPath(TU.Filename)}); ASSERT_TRUE(bool(RenameResult)) << RenameResult.takeError() << Code.point(); - auto ApplyResult = - llvm::cantFail(tooling::applyAllReplacements(Code.code(), *RenameResult)); - EXPECT_EQ(expectedResult(Code, NewName), ApplyResult); + ASSERT_EQ(1u, RenameResult->size()); + EXPECT_EQ(applyEdits(std::move(*RenameResult)).front().second, + expectedResult(Code, NewName)); +} + +TEST(RenameTests, CrossFile) { + Annotations FooCode("class [[Foo]] {};"); + std::string FooPath = testPath("foo.cc"); + Annotations FooDirtyBuffer("class [[Foo]] {};\n// this is dirty buffer"); + Annotations BarCode("void [[Bar]]() {}"); + std::string BarPath = testPath("bar.cc"); + // Build the index, the index has "Foo" references from foo.cc and "Bar" + // references from bar.cc. + FileSymbols FSymbols; + FSymbols.update(FooPath, nullptr, buildRefSlab(FooCode, "Foo", FooPath), + nullptr, false); + FSymbols.update(BarPath, nullptr, buildRefSlab(BarCode, "Bar", BarPath), + nullptr, false); + auto Index = FSymbols.buildIndex(IndexType::Light); + + Annotations MainCode("class [[Fo^o]] {};"); + auto MainFilePath = testPath("main.cc"); + // Dirty buffer for foo.cc. + auto GetDirtyBuffer = [&](PathRef Path) -> llvm::Optional { + if (Path == FooPath) + return FooDirtyBuffer.code().str(); + return llvm::None; + }; + + // Run rename on Foo, there is a dirty buffer for foo.cc, rename should + // respect the dirty buffer. + TestTU TU = TestTU::withCode(MainCode.code()); + auto AST = TU.build(); + llvm::StringRef NewName = "newName"; + auto Results = rename({MainCode.point(), NewName, AST, MainFilePath, + Index.get(), /*CrossFile=*/true, GetDirtyBuffer}); + ASSERT_TRUE(bool(Results)) << Results.takeError(); + EXPECT_THAT( + applyEdits(std::move(*Results)), + UnorderedElementsAre( + Pair(Eq(FooPath), Eq(expectedResult(FooDirtyBuffer, NewName))), + Pair(Eq(MainFilePath), Eq(expectedResult(MainCode, NewName))))); + + // Run rename on Bar, there is no dirty buffer for the affected file bar.cc, + // so we should read file content from VFS. + MainCode = Annotations("void [[Bar]]() { [[B^ar]](); }"); + TU = TestTU::withCode(MainCode.code()); + // Set a file "bar.cc" on disk. + TU.AdditionalFiles["bar.cc"] = BarCode.code(); + AST = TU.build(); + Results = rename({MainCode.point(), NewName, AST, MainFilePath, Index.get(), + /*CrossFile=*/true, GetDirtyBuffer}); + ASSERT_TRUE(bool(Results)) << Results.takeError(); + EXPECT_THAT( + applyEdits(std::move(*Results)), + UnorderedElementsAre( + Pair(Eq(BarPath), Eq(expectedResult(BarCode, NewName))), + Pair(Eq(MainFilePath), Eq(expectedResult(MainCode, NewName))))); +} + +TEST(CrossFileRenameTests, CrossFileOnLocalSymbol) { + // cross-file rename should work for function-local symbols, even there is no + // index provided. + Annotations Code("void f(int [[abc]]) { [[a^bc]] = 3; }"); + auto TU = TestTU::withCode(Code.code()); + auto Path = testPath(TU.Filename); + auto AST = TU.build(); + llvm::StringRef NewName = "newName"; + auto Results = rename({Code.point(), NewName, AST, Path}); + ASSERT_TRUE(bool(Results)) << Results.takeError(); + EXPECT_THAT( + applyEdits(std::move(*Results)), + UnorderedElementsAre(Pair(Eq(Path), Eq(expectedResult(Code, NewName))))); } } // namespace diff --git a/clang-tools-extra/clangd/unittests/SyncAPI.cpp b/clang-tools-extra/clangd/unittests/SyncAPI.cpp index 812fa7a0f2ecb..085eacd42fee3 100644 --- a/clang-tools-extra/clangd/unittests/SyncAPI.cpp +++ b/clang-tools-extra/clangd/unittests/SyncAPI.cpp @@ -96,10 +96,9 @@ runFindDocumentHighlights(ClangdServer &Server, PathRef File, Position Pos) { return std::move(*Result); } -llvm::Expected> runRename(ClangdServer &Server, - PathRef File, Position Pos, - llvm::StringRef NewName) { - llvm::Optional>> Result; +llvm::Expected runRename(ClangdServer &Server, PathRef File, + Position Pos, llvm::StringRef NewName) { + llvm::Optional> Result; Server.rename(File, Pos, NewName, /*WantFormat=*/true, capture(Result)); return std::move(*Result); } diff --git a/clang-tools-extra/clangd/unittests/SyncAPI.h b/clang-tools-extra/clangd/unittests/SyncAPI.h index 5ffed1fbb120c..55a538ef6a977 100644 --- a/clang-tools-extra/clangd/unittests/SyncAPI.h +++ b/clang-tools-extra/clangd/unittests/SyncAPI.h @@ -38,8 +38,8 @@ runLocateSymbolAt(ClangdServer &Server, PathRef File, Position Pos); llvm::Expected> runFindDocumentHighlights(ClangdServer &Server, PathRef File, Position Pos); -llvm::Expected> -runRename(ClangdServer &Server, PathRef File, Position Pos, StringRef NewName); +llvm::Expected runRename(ClangdServer &Server, PathRef File, + Position Pos, StringRef NewName); std::string runDumpAST(ClangdServer &Server, PathRef File); From a2601a4116f6024b45fc6fbbbb53777d2f6a3cd3 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 26 Nov 2019 10:16:06 +0100 Subject: [PATCH 012/591] clang-format-vs : Fix typo NUGET_EXE_DIR on README Match with the CMake variable. Patch by empty2fill! Differential revision: https://reviews.llvm.org/D70632 --- clang/tools/clang-format-vs/README.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/tools/clang-format-vs/README.txt b/clang/tools/clang-format-vs/README.txt index 84e0b451f018d..2cac5b9af9e3c 100644 --- a/clang/tools/clang-format-vs/README.txt +++ b/clang/tools/clang-format-vs/README.txt @@ -10,12 +10,12 @@ the following CMake vars: - BUILD_CLANG_FORMAT_VS_PLUGIN=ON -- NUGET_EXE_PATH=path/to/nuget_dir (unless nuget.exe is already available in PATH) +- NUGET_EXE_DIR=path/to/nuget_dir (unless nuget.exe is already available in PATH) example: cd /d C:\code\llvm mkdir build & cd build - cmake -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DNUGET_EXE_PATH=C:\nuget .. + cmake -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DNUGET_EXE_DIR=C:\nuget .. Once LLVM.sln is generated, build the clang_format_vsix target, which will build ClangFormat.sln, the C# extension application. From 78ad22e0cc6390fcd44b2b7b5132f1b960ff975d Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 11 Oct 2019 13:46:47 +0100 Subject: [PATCH 013/591] Recommit ARM-NEON: make type modifiers orthogonal and allow multiple modifiers. The modifier system used to mutate types on NEON intrinsic definitions had a separate letter for all kinds of transformations that might be needed, and we were quite quickly running out of letters to use. This patch converts to a much smaller set of orthogonal modifiers that can be applied together to achieve the desired effect. When merging with downstream it is likely to cause a conflict with any local modifications to the .td files. There is a new script in utils/convert_arm_neon.py that was used to convert all .td definitions and I would suggest running it on the last downstream version of those files before this commit rather than resolving conflicts manually. The original version broke vcreate_* because it became a macro and didn't apply the normal integer promotion rules before bitcasting to a vector. This adds a temporary. --- clang/include/clang/Basic/arm_fp16.td | 166 +- clang/include/clang/Basic/arm_neon.td | 1433 +++++++++--------- clang/include/clang/Basic/arm_neon_incl.td | 69 +- clang/test/CodeGen/aarch64-neon-intrinsics.c | 4 - clang/test/CodeGen/arm_neon_intrinsics.c | 7 + clang/utils/TableGen/NeonEmitter.cpp | 452 ++---- clang/utils/convert_arm_neon.py | 172 +++ 7 files changed, 1173 insertions(+), 1130 deletions(-) create mode 100644 clang/utils/convert_arm_neon.py diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td index bb9873efac853..79cd16233c104 100644 --- a/clang/include/clang/Basic/arm_fp16.td +++ b/clang/include/clang/Basic/arm_fp16.td @@ -17,118 +17,118 @@ include "arm_neon_incl.td" let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { // Negate - def VNEGSH : SInst<"vneg", "ss", "Sh">; + def VNEGSH : SInst<"vneg", "11", "Sh">; // Reciprocal/Sqrt - def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">; - def FSQRTSH : SInst<"vsqrt", "ss", "Sh">; - def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">; + def SCALAR_FRECPSH : IInst<"vrecps", "111", "Sh">; + def FSQRTSH : SInst<"vsqrt", "11", "Sh">; + def SCALAR_FRSQRTSH : IInst<"vrsqrts", "111", "Sh">; // Reciprocal Estimate - def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">; + def SCALAR_FRECPEH : IInst<"vrecpe", "11", "Sh">; // Reciprocal Exponent - def SCALAR_FRECPXH : IInst<"vrecpx", "ss", "Sh">; + def SCALAR_FRECPXH : IInst<"vrecpx", "11", "Sh">; // Reciprocal Square Root Estimate - def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">; + def SCALAR_FRSQRTEH : IInst<"vrsqrte", "11", "Sh">; // Rounding - def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">; - def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">; - def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">; - def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">; - def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">; - def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">; - def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">; + def FRINTZ_S64H : SInst<"vrnd", "11", "Sh">; + def FRINTA_S64H : SInst<"vrnda", "11", "Sh">; + def FRINTI_S64H : SInst<"vrndi", "11", "Sh">; + def FRINTM_S64H : SInst<"vrndm", "11", "Sh">; + def FRINTN_S64H : SInst<"vrndn", "11", "Sh">; + def FRINTP_S64H : SInst<"vrndp", "11", "Sh">; + def FRINTX_S64H : SInst<"vrndx", "11", "Sh">; // Conversion - def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "sUs">; - def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "iUi">; - def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "Ys", "lUl">; - def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">; - def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">; - def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">; - def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">; - def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">; - def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">; - def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">; - def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">; - def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">; - def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">; - def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">; - def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">; - def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">; - def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">; - def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">; - def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "bs", "Sh">; - def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">; - def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">; - def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "$s", "Sh">; - def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">; - def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">; - def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "bs", "Sh">; - def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">; - def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">; - def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "$s", "Sh">; - def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">; - def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">; - def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "bs", "Sh">; - def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">; - def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">; + def SCALAR_SCVTFSH : SInst<"vcvth_f16", "(1F)(1!)", "sUs">; + def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "(1F<)(1!)", "iUi">; + def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "(1F<<)(1!)", "lUl">; + def SCALAR_FCVTZSH : SInst<"vcvt_s16", "(1S)1", "Sh">; + def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTZUH : SInst<"vcvt_u16", "(1U)1", "Sh">; + def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTASH : SInst<"vcvta_s16", "(1S)1", "Sh">; + def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTAUH : SInst<"vcvta_u16", "(1U)1", "Sh">; + def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "(1S)1", "Sh">; + def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTMUH : SInst<"vcvtm_u16", "(1U)1", "Sh">; + def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTNSH : SInst<"vcvtn_s16", "(1S)1", "Sh">; + def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTNUH : SInst<"vcvtn_u16", "(1U)1", "Sh">; + def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "(1U>>)1", "Sh">; + def SCALAR_FCVTPSH : SInst<"vcvtp_s16", "(1S)1", "Sh">; + def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "(1S>)1", "Sh">; + def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "(1S>>)1", "Sh">; + def SCALAR_FCVTPUH : SInst<"vcvtp_u16", "(1U)1", "Sh">; + def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">; + def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">; let isVCVT_N = 1 in { - def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "sUs">; - def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "Ysi", "iUi">; - def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "Ysi", "lUl">; - def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">; - def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">; - def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">; - def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">; - def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">; - def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">; + def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">; + def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">; + def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">; + def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">; + def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">; + def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">; + def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">; + def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">; + def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">; } // Comparison - def SCALAR_CMEQRH : SInst<"vceq", "bss", "Sh">; - def SCALAR_CMEQZH : SInst<"vceqz", "bs", "Sh">; - def SCALAR_CMGERH : SInst<"vcge", "bss", "Sh">; - def SCALAR_CMGEZH : SInst<"vcgez", "bs", "Sh">; - def SCALAR_CMGTRH : SInst<"vcgt", "bss", "Sh">; - def SCALAR_CMGTZH : SInst<"vcgtz", "bs", "Sh">; - def SCALAR_CMLERH : SInst<"vcle", "bss", "Sh">; - def SCALAR_CMLEZH : SInst<"vclez", "bs", "Sh">; - def SCALAR_CMLTH : SInst<"vclt", "bss", "Sh">; - def SCALAR_CMLTZH : SInst<"vcltz", "bs", "Sh">; + def SCALAR_CMEQRH : SInst<"vceq", "(1U)11", "Sh">; + def SCALAR_CMEQZH : SInst<"vceqz", "(1U)1", "Sh">; + def SCALAR_CMGERH : SInst<"vcge", "(1U)11", "Sh">; + def SCALAR_CMGEZH : SInst<"vcgez", "(1U)1", "Sh">; + def SCALAR_CMGTRH : SInst<"vcgt", "(1U)11", "Sh">; + def SCALAR_CMGTZH : SInst<"vcgtz", "(1U)1", "Sh">; + def SCALAR_CMLERH : SInst<"vcle", "(1U)11", "Sh">; + def SCALAR_CMLEZH : SInst<"vclez", "(1U)1", "Sh">; + def SCALAR_CMLTH : SInst<"vclt", "(1U)11", "Sh">; + def SCALAR_CMLTZH : SInst<"vcltz", "(1U)1", "Sh">; // Absolute Compare Mask Greater Than Or Equal - def SCALAR_FACGEH : IInst<"vcage", "bss", "Sh">; - def SCALAR_FACLEH : IInst<"vcale", "bss", "Sh">; + def SCALAR_FACGEH : IInst<"vcage", "(1U)11", "Sh">; + def SCALAR_FACLEH : IInst<"vcale", "(1U)11", "Sh">; // Absolute Compare Mask Greater Than - def SCALAR_FACGT : IInst<"vcagt", "bss", "Sh">; - def SCALAR_FACLT : IInst<"vcalt", "bss", "Sh">; + def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "Sh">; + def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "Sh">; // Scalar Absolute Value - def SCALAR_ABSH : SInst<"vabs", "ss", "Sh">; + def SCALAR_ABSH : SInst<"vabs", "11", "Sh">; // Scalar Absolute Difference - def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">; + def SCALAR_ABDH: IInst<"vabd", "111", "Sh">; // Add/Sub - def VADDSH : SInst<"vadd", "sss", "Sh">; - def VSUBHS : SInst<"vsub", "sss", "Sh">; + def VADDSH : SInst<"vadd", "111", "Sh">; + def VSUBHS : SInst<"vsub", "111", "Sh">; // Max/Min - def VMAXHS : SInst<"vmax", "sss", "Sh">; - def VMINHS : SInst<"vmin", "sss", "Sh">; - def FMAXNMHS : SInst<"vmaxnm", "sss", "Sh">; - def FMINNMHS : SInst<"vminnm", "sss", "Sh">; + def VMAXHS : SInst<"vmax", "111", "Sh">; + def VMINHS : SInst<"vmin", "111", "Sh">; + def FMAXNMHS : SInst<"vmaxnm", "111", "Sh">; + def FMINNMHS : SInst<"vminnm", "111", "Sh">; // Multiplication/Division - def VMULHS : SInst<"vmul", "sss", "Sh">; - def MULXHS : SInst<"vmulx", "sss", "Sh">; - def FDIVHS : SInst<"vdiv", "sss", "Sh">; + def VMULHS : SInst<"vmul", "111", "Sh">; + def MULXHS : SInst<"vmulx", "111", "Sh">; + def FDIVHS : SInst<"vdiv", "111", "Sh">; // Vector fused multiply-add operations - def VFMAHS : SInst<"vfma", "ssss", "Sh">; - def VFMSHS : SInst<"vfms", "ssss", "Sh">; + def VFMAHS : SInst<"vfma", "1111", "Sh">; + def VFMSHS : SInst<"vfms", "1111", "Sh">; } diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 127c5af97ce67..b5e395c8103f1 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -109,7 +109,8 @@ def OP_OR : Op<(op "|", $p0, $p1)>; def OP_XOR : Op<(op "^", $p0, $p1)>; def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>; def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>; -def OP_CAST : Op<(cast "R", $p0)>; +def OP_CAST : LOp<[(save_temp $promote, $p0), + (cast "R", $promote)]>; def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>; def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>; def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>; @@ -226,240 +227,240 @@ def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1, //////////////////////////////////////////////////////////////////////////////// // E.3.1 Addition -def VADD : IOpInst<"vadd", "ddd", +def VADD : IOpInst<"vadd", "...", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_ADD>; -def VADDL : SOpInst<"vaddl", "wdd", "csiUcUsUi", OP_ADDL>; -def VADDW : SOpInst<"vaddw", "wwd", "csiUcUsUi", OP_ADDW>; -def VHADD : SInst<"vhadd", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VRHADD : SInst<"vrhadd", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VQADD : SInst<"vqadd", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VADDHN : IInst<"vaddhn", "hkk", "silUsUiUl">; -def VRADDHN : IInst<"vraddhn", "hkk", "silUsUiUl">; +def VADDL : SOpInst<"vaddl", "(>Q)..", "csiUcUsUi", OP_ADDL>; +def VADDW : SOpInst<"vaddw", "(>Q)(>Q).", "csiUcUsUi", OP_ADDW>; +def VHADD : SInst<"vhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VRHADD : SInst<"vrhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VQADD : SInst<"vqadd", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VADDHN : IInst<"vaddhn", "; +def VRADDHN : IInst<"vraddhn", "; //////////////////////////////////////////////////////////////////////////////// // E.3.2 Multiplication -def VMUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>; -def VMULP : SInst<"vmul", "ddd", "PcQPc">; -def VMLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; -def VMLAL : SOpInst<"vmlal", "wwdd", "csiUcUsUi", OP_MLAL>; -def VMLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; -def VMLSL : SOpInst<"vmlsl", "wwdd", "csiUcUsUi", OP_MLSL>; -def VQDMULH : SInst<"vqdmulh", "ddd", "siQsQi">; -def VQRDMULH : SInst<"vqrdmulh", "ddd", "siQsQi">; +def VMUL : IOpInst<"vmul", "...", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>; +def VMULP : SInst<"vmul", "...", "PcQPc">; +def VMLA : IOpInst<"vmla", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>; +def VMLAL : SOpInst<"vmlal", "(>Q)(>Q)..", "csiUcUsUi", OP_MLAL>; +def VMLS : IOpInst<"vmls", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>; +def VMLSL : SOpInst<"vmlsl", "(>Q)(>Q)..", "csiUcUsUi", OP_MLSL>; +def VQDMULH : SInst<"vqdmulh", "...", "siQsQi">; +def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">; let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { -def VQRDMLAH : SOpInst<"vqrdmlah", "dddd", "siQsQi", OP_QRDMLAH>; -def VQRDMLSH : SOpInst<"vqrdmlsh", "dddd", "siQsQi", OP_QRDMLSH>; +def VQRDMLAH : SOpInst<"vqrdmlah", "....", "siQsQi", OP_QRDMLAH>; +def VQRDMLSH : SOpInst<"vqrdmlsh", "....", "siQsQi", OP_QRDMLSH>; } -def VQDMLAL : SInst<"vqdmlal", "wwdd", "si">; -def VQDMLSL : SInst<"vqdmlsl", "wwdd", "si">; -def VMULL : SInst<"vmull", "wdd", "csiUcUsUiPc">; -def VQDMULL : SInst<"vqdmull", "wdd", "si">; +def VQDMLAL : SInst<"vqdmlal", "(>Q)(>Q)..", "si">; +def VQDMLSL : SInst<"vqdmlsl", "(>Q)(>Q)..", "si">; +def VMULL : SInst<"vmull", "(>Q)..", "csiUcUsUiPc">; +def VQDMULL : SInst<"vqdmull", "(>Q)..", "si">; //////////////////////////////////////////////////////////////////////////////// // E.3.3 Subtraction -def VSUB : IOpInst<"vsub", "ddd", +def VSUB : IOpInst<"vsub", "...", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_SUB>; -def VSUBL : SOpInst<"vsubl", "wdd", "csiUcUsUi", OP_SUBL>; -def VSUBW : SOpInst<"vsubw", "wwd", "csiUcUsUi", OP_SUBW>; -def VQSUB : SInst<"vqsub", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VHSUB : SInst<"vhsub", "ddd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VSUBHN : IInst<"vsubhn", "hkk", "silUsUiUl">; -def VRSUBHN : IInst<"vrsubhn", "hkk", "silUsUiUl">; +def VSUBL : SOpInst<"vsubl", "(>Q)..", "csiUcUsUi", OP_SUBL>; +def VSUBW : SOpInst<"vsubw", "(>Q)(>Q).", "csiUcUsUi", OP_SUBW>; +def VQSUB : SInst<"vqsub", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VHSUB : SInst<"vhsub", "...", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VSUBHN : IInst<"vsubhn", "; +def VRSUBHN : IInst<"vrsubhn", "; //////////////////////////////////////////////////////////////////////////////// // E.3.4 Comparison -def VCEQ : IOpInst<"vceq", "udd", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; -def VCGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; +def VCEQ : IOpInst<"vceq", "U..", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>; +def VCGE : SOpInst<"vcge", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>; let InstName = "vcge" in -def VCLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; -def VCGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; +def VCLE : SOpInst<"vcle", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>; +def VCGT : SOpInst<"vcgt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>; let InstName = "vcgt" in -def VCLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; +def VCLT : SOpInst<"vclt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>; let InstName = "vacge" in { -def VCAGE : IInst<"vcage", "udd", "fQf">; -def VCALE : IInst<"vcale", "udd", "fQf">; +def VCAGE : IInst<"vcage", "U..", "fQf">; +def VCALE : IInst<"vcale", "U..", "fQf">; } let InstName = "vacgt" in { -def VCAGT : IInst<"vcagt", "udd", "fQf">; -def VCALT : IInst<"vcalt", "udd", "fQf">; +def VCAGT : IInst<"vcagt", "U..", "fQf">; +def VCALT : IInst<"vcalt", "U..", "fQf">; } -def VTST : WInst<"vtst", "udd", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">; +def VTST : WInst<"vtst", "U..", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.5 Absolute Difference -def VABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; -def VABDL : SOpInst<"vabdl", "wdd", "csiUcUsUi", OP_ABDL>; -def VABA : SOpInst<"vaba", "dddd", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>; -def VABAL : SOpInst<"vabal", "wwdd", "csiUcUsUi", OP_ABAL>; +def VABD : SInst<"vabd", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VABDL : SOpInst<"vabdl", "(>Q)..", "csiUcUsUi", OP_ABDL>; +def VABA : SOpInst<"vaba", "....", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>; +def VABAL : SOpInst<"vabal", "(>Q)(>Q)..", "csiUcUsUi", OP_ABAL>; //////////////////////////////////////////////////////////////////////////////// // E.3.6 Max/Min -def VMAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; -def VMIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VMAX : SInst<"vmax", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; +def VMIN : SInst<"vmin", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.7 Pairwise Addition -def VPADD : IInst<"vpadd", "ddd", "csiUcUsUif">; -def VPADDL : SInst<"vpaddl", "nd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VPADAL : SInst<"vpadal", "nnd", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VPADD : IInst<"vpadd", "...", "csiUcUsUif">; +def VPADDL : SInst<"vpaddl", ">.", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VPADAL : SInst<"vpadal", ">>.", "csiUcUsUiQcQsQiQUcQUsQUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.8-9 Folding Max/Min -def VPMAX : SInst<"vpmax", "ddd", "csiUcUsUif">; -def VPMIN : SInst<"vpmin", "ddd", "csiUcUsUif">; +def VPMAX : SInst<"vpmax", "...", "csiUcUsUif">; +def VPMIN : SInst<"vpmin", "...", "csiUcUsUif">; //////////////////////////////////////////////////////////////////////////////// // E.3.10 Reciprocal/Sqrt -def VRECPS : IInst<"vrecps", "ddd", "fQf">; -def VRSQRTS : IInst<"vrsqrts", "ddd", "fQf">; +def VRECPS : IInst<"vrecps", "...", "fQf">; +def VRSQRTS : IInst<"vrsqrts", "...", "fQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.11 Shifts by signed variable -def VSHL : SInst<"vshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHL : SInst<"vqshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSHL : SInst<"vrshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQRSHL : SInst<"vqrshl", "ddx", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSHL : SInst<"vshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHL : SInst<"vqshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSHL : SInst<"vrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.12 Shifts by constant let isShift = 1 in { -def VSHR_N : SInst<"vshr_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VSHL_N : IInst<"vshl_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSHR_N : SInst<"vrshr_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VSRA_N : SInst<"vsra_n", "dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VRSRA_N : SInst<"vrsra_n", "dddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHL_N : SInst<"vqshl_n", "ddi", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; -def VQSHLU_N : SInst<"vqshlu_n", "udi", "csilQcQsQiQl">; -def VSHRN_N : IInst<"vshrn_n", "hki", "silUsUiUl">; -def VQSHRUN_N : SInst<"vqshrun_n", "eki", "sil">; -def VQRSHRUN_N : SInst<"vqrshrun_n", "eki", "sil">; -def VQSHRN_N : SInst<"vqshrn_n", "hki", "silUsUiUl">; -def VRSHRN_N : IInst<"vrshrn_n", "hki", "silUsUiUl">; -def VQRSHRN_N : SInst<"vqrshrn_n", "hki", "silUsUiUl">; -def VSHLL_N : SInst<"vshll_n", "wdi", "csiUcUsUi">; +def VSHR_N : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSHL_N : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSHR_N : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VSRA_N : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VRSRA_N : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHL_N : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">; +def VQSHLU_N : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">; +def VSHRN_N : IInst<"vshrn_n", "; +def VQSHRUN_N : SInst<"vqshrun_n", "(; +def VQRSHRUN_N : SInst<"vqrshrun_n", "(; +def VQSHRN_N : SInst<"vqshrn_n", "; +def VRSHRN_N : IInst<"vrshrn_n", "; +def VQRSHRN_N : SInst<"vqrshrn_n", "; +def VSHLL_N : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.13 Shifts with insert -def VSRI_N : WInst<"vsri_n", "dddi", +def VSRI_N : WInst<"vsri_n", "...I", "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; -def VSLI_N : WInst<"vsli_n", "dddi", +def VSLI_N : WInst<"vsli_n", "...I", "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">; } //////////////////////////////////////////////////////////////////////////////// // E.3.14 Loads and stores of a single vector -def VLD1 : WInst<"vld1", "dc", +def VLD1 : WInst<"vld1", ".(c*!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD1_X2 : WInst<"vld1_x2", "2c", +def VLD1_X2 : WInst<"vld1_x2", "2(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_X3 : WInst<"vld1_x3", "3c", +def VLD1_X3 : WInst<"vld1_x3", "3(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_X4 : WInst<"vld1_x4", "4c", +def VLD1_X4 : WInst<"vld1_x4", "4(c*!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VLD1_LANE : WInst<"vld1_lane", "dcdi", +def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD1_DUP : WInst<"vld1_dup", "dc", +def VLD1_DUP : WInst<"vld1_dup", ".(c*!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST1 : WInst<"vst1", "vpd", +def VST1 : WInst<"vst1", "v*(.!)", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST1_X2 : WInst<"vst1_x2", "vp2", +def VST1_X2 : WInst<"vst1_x2", "v*(2!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_X3 : WInst<"vst1_x3", "vp3", +def VST1_X3 : WInst<"vst1_x3", "v*(3!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_X4 : WInst<"vst1_x4", "vp4", +def VST1_X4 : WInst<"vst1_x4", "v*(4!)", "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">; -def VST1_LANE : WInst<"vst1_lane", "vpdi", +def VST1_LANE : WInst<"vst1_lane", "v*(.!)I", "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">; let ArchGuard = "(__ARM_FP & 2)" in { -def VLD1_F16 : WInst<"vld1", "dc", "hQh">; -def VLD1_X2_F16 : WInst<"vld1_x2", "2c", "hQh">; -def VLD1_X3_F16 : WInst<"vld1_x3", "3c", "hQh">; -def VLD1_X4_F16 : WInst<"vld1_x4", "4c", "hQh">; -def VLD1_LANE_F16 : WInst<"vld1_lane", "dcdi", "hQh">; -def VLD1_DUP_F16 : WInst<"vld1_dup", "dc", "hQh">; -def VST1_F16 : WInst<"vst1", "vpd", "hQh">; -def VST1_X2_F16 : WInst<"vst1_x2", "vp2", "hQh">; -def VST1_X3_F16 : WInst<"vst1_x3", "vp3", "hQh">; -def VST1_X4_F16 : WInst<"vst1_x4", "vp4", "hQh">; -def VST1_LANE_F16 : WInst<"vst1_lane", "vpdi", "hQh">; +def VLD1_F16 : WInst<"vld1", ".(c*!)", "hQh">; +def VLD1_X2_F16 : WInst<"vld1_x2", "2(c*!)", "hQh">; +def VLD1_X3_F16 : WInst<"vld1_x3", "3(c*!)", "hQh">; +def VLD1_X4_F16 : WInst<"vld1_x4", "4(c*!)", "hQh">; +def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">; +def VLD1_DUP_F16 : WInst<"vld1_dup", ".(c*!)", "hQh">; +def VST1_F16 : WInst<"vst1", "v*(.!)", "hQh">; +def VST1_X2_F16 : WInst<"vst1_x2", "v*(2!)", "hQh">; +def VST1_X3_F16 : WInst<"vst1_x3", "v*(3!)", "hQh">; +def VST1_X4_F16 : WInst<"vst1_x4", "v*(4!)", "hQh">; +def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">; } //////////////////////////////////////////////////////////////////////////////// // E.3.15 Loads and stores of an N-element structure -def VLD2 : WInst<"vld2", "2c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD3 : WInst<"vld3", "3c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD4 : WInst<"vld4", "4c", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VLD2_DUP : WInst<"vld2_dup", "2c", +def VLD2 : WInst<"vld2", "2(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD3 : WInst<"vld3", "3(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD4 : WInst<"vld4", "4(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VLD2_DUP : WInst<"vld2_dup", "2(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD3_DUP : WInst<"vld3_dup", "3c", +def VLD3_DUP : WInst<"vld3_dup", "3(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD4_DUP : WInst<"vld4_dup", "4c", +def VLD4_DUP : WInst<"vld4_dup", "4(c*!)", "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">; -def VLD2_LANE : WInst<"vld2_lane", "2c2i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VLD3_LANE : WInst<"vld3_lane", "3c3i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VLD4_LANE : WInst<"vld4_lane", "4c4i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST2 : WInst<"vst2", "vp2", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST3 : WInst<"vst3", "vp3", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST4 : WInst<"vst4", "vp4", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; -def VST2_LANE : WInst<"vst2_lane", "vp2i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST3_LANE : WInst<"vst3_lane", "vp3i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; -def VST4_LANE : WInst<"vst4_lane", "vp4i", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">; +def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; +def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">; let ArchGuard = "(__ARM_FP & 2)" in { -def VLD2_F16 : WInst<"vld2", "2c", "hQh">; -def VLD3_F16 : WInst<"vld3", "3c", "hQh">; -def VLD4_F16 : WInst<"vld4", "4c", "hQh">; -def VLD2_DUP_F16 : WInst<"vld2_dup", "2c", "hQh">; -def VLD3_DUP_F16 : WInst<"vld3_dup", "3c", "hQh">; -def VLD4_DUP_F16 : WInst<"vld4_dup", "4c", "hQh">; -def VLD2_LANE_F16 : WInst<"vld2_lane", "2c2i", "hQh">; -def VLD3_LANE_F16 : WInst<"vld3_lane", "3c3i", "hQh">; -def VLD4_LANE_F16 : WInst<"vld4_lane", "4c4i", "hQh">; -def VST2_F16 : WInst<"vst2", "vp2", "hQh">; -def VST3_F16 : WInst<"vst3", "vp3", "hQh">; -def VST4_F16 : WInst<"vst4", "vp4", "hQh">; -def VST2_LANE_F16 : WInst<"vst2_lane", "vp2i", "hQh">; -def VST3_LANE_F16 : WInst<"vst3_lane", "vp3i", "hQh">; -def VST4_LANE_F16 : WInst<"vst4_lane", "vp4i", "hQh">; +def VLD2_F16 : WInst<"vld2", "2(c*!)", "hQh">; +def VLD3_F16 : WInst<"vld3", "3(c*!)", "hQh">; +def VLD4_F16 : WInst<"vld4", "4(c*!)", "hQh">; +def VLD2_DUP_F16 : WInst<"vld2_dup", "2(c*!)", "hQh">; +def VLD3_DUP_F16 : WInst<"vld3_dup", "3(c*!)", "hQh">; +def VLD4_DUP_F16 : WInst<"vld4_dup", "4(c*!)", "hQh">; +def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">; +def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">; +def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">; +def VST2_F16 : WInst<"vst2", "v*(2!)", "hQh">; +def VST3_F16 : WInst<"vst3", "v*(3!)", "hQh">; +def VST4_F16 : WInst<"vst4", "v*(4!)", "hQh">; +def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">; +def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">; +def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">; } //////////////////////////////////////////////////////////////////////////////// // E.3.16 Extract lanes from a vector let InstName = "vmov" in -def VGET_LANE : IInst<"vget_lane", "sdi", +def VGET_LANE : IInst<"vget_lane", "1.I", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.17 Set lanes within a vector let InstName = "vmov" in -def VSET_LANE : IInst<"vset_lane", "dsdi", +def VSET_LANE : IInst<"vset_lane", ".1.I", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">; //////////////////////////////////////////////////////////////////////////////// // E.3.18 Initialize a vector from bit pattern -def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST> { +def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPsl", OP_CAST> { let BigEndianSafe = 1; } //////////////////////////////////////////////////////////////////////////////// // E.3.19 Set all lanes to same value let InstName = "vmov" in { -def VDUP_N : WOpInst<"vdup_n", "ds", +def VDUP_N : WOpInst<"vdup_n", ".1", "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP>; -def VMOV_N : WOpInst<"vmov_n", "ds", +def VMOV_N : WOpInst<"vmov_n", ".1", "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", OP_DUP>; } let InstName = "" in -def VDUP_LANE: WOpInst<"vdup_lane", "dgi", +def VDUP_LANE: WOpInst<"vdup_lane", ".qI", "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP_LN>; //////////////////////////////////////////////////////////////////////////////// // E.3.20 Combining vectors -def VCOMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; +def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// // E.3.21 Splitting vectors @@ -468,127 +469,127 @@ def VCOMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfUcUsUiUlPcPs", OP_CONC>; // versions of these intrinsics in both AArch32 and AArch64 architectures. See // D45668 for more details. let InstName = "vmov" in { -def VGET_HIGH : NoTestOpInst<"vget_high", "dk", "csilhfUcUsUiUlPcPs", OP_HI>; -def VGET_LOW : NoTestOpInst<"vget_low", "dk", "csilhfUcUsUiUlPcPs", OP_LO>; +def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPs", OP_HI>; +def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPs", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// // E.3.22 Converting vectors let ArchGuard = "(__ARM_FP & 2)" in { - def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "md", "Hf">; - def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "wd", "h">; + def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "(; + def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "(>Q)(.!)", "h">; } -def VCVT_S32 : SInst<"vcvt_s32", "xd", "fQf">; -def VCVT_U32 : SInst<"vcvt_u32", "ud", "fQf">; -def VCVT_F32 : SInst<"vcvt_f32", "fd", "iUiQiQUi">; +def VCVT_S32 : SInst<"vcvt_s32", "S.", "fQf">; +def VCVT_U32 : SInst<"vcvt_u32", "U.", "fQf">; +def VCVT_F32 : SInst<"vcvt_f32", "F(.!)", "iUiQiQUi">; let isVCVT_N = 1 in { -def VCVT_N_S32 : SInst<"vcvt_n_s32", "xdi", "fQf">; -def VCVT_N_U32 : SInst<"vcvt_n_u32", "udi", "fQf">; -def VCVT_N_F32 : SInst<"vcvt_n_f32", "fdi", "iUiQiQUi">; +def VCVT_N_S32 : SInst<"vcvt_n_s32", "S.I", "fQf">; +def VCVT_N_U32 : SInst<"vcvt_n_u32", "U.I", "fQf">; +def VCVT_N_F32 : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">; } -def VMOVN : IInst<"vmovn", "hk", "silUsUiUl">; -def VMOVL : SInst<"vmovl", "wd", "csiUcUsUi">; -def VQMOVN : SInst<"vqmovn", "hk", "silUsUiUl">; -def VQMOVUN : SInst<"vqmovun", "ek", "sil">; +def VMOVN : IInst<"vmovn", "; +def VMOVL : SInst<"vmovl", "(>Q).", "csiUcUsUi">; +def VQMOVN : SInst<"vqmovn", "; +def VQMOVUN : SInst<"vqmovun", "(; //////////////////////////////////////////////////////////////////////////////// // E.3.23-24 Table lookup, Extended table lookup let InstName = "vtbl" in { -def VTBL1 : WInst<"vtbl1", "ddt", "UccPc">; -def VTBL2 : WInst<"vtbl2", "d2t", "UccPc">; -def VTBL3 : WInst<"vtbl3", "d3t", "UccPc">; -def VTBL4 : WInst<"vtbl4", "d4t", "UccPc">; +def VTBL1 : WInst<"vtbl1", "..p", "UccPc">; +def VTBL2 : WInst<"vtbl2", ".2p", "UccPc">; +def VTBL3 : WInst<"vtbl3", ".3p", "UccPc">; +def VTBL4 : WInst<"vtbl4", ".4p", "UccPc">; } let InstName = "vtbx" in { -def VTBX1 : WInst<"vtbx1", "dddt", "UccPc">; -def VTBX2 : WInst<"vtbx2", "dd2t", "UccPc">; -def VTBX3 : WInst<"vtbx3", "dd3t", "UccPc">; -def VTBX4 : WInst<"vtbx4", "dd4t", "UccPc">; +def VTBX1 : WInst<"vtbx1", "...p", "UccPc">; +def VTBX2 : WInst<"vtbx2", "..2p", "UccPc">; +def VTBX3 : WInst<"vtbx3", "..3p", "UccPc">; +def VTBX4 : WInst<"vtbx4", "..4p", "UccPc">; } //////////////////////////////////////////////////////////////////////////////// // E.3.25 Operations with a scalar value -def VMLA_LANE : IOpInst<"vmla_lane", "dddgi", +def VMLA_LANE : IOpInst<"vmla_lane", "...qI", "siUsUifQsQiQUsQUiQf", OP_MLA_LN>; -def VMLAL_LANE : SOpInst<"vmlal_lane", "wwddi", "siUsUi", OP_MLAL_LN>; -def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "wwddi", "si", OP_QDMLAL_LN>; -def VMLS_LANE : IOpInst<"vmls_lane", "dddgi", +def VMLAL_LANE : SOpInst<"vmlal_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLAL_LN>; +def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "(>Q)(>Q)..I", "si", OP_QDMLAL_LN>; +def VMLS_LANE : IOpInst<"vmls_lane", "...qI", "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; -def VMLSL_LANE : SOpInst<"vmlsl_lane", "wwddi", "siUsUi", OP_MLSL_LN>; -def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "wwddi", "si", OP_QDMLSL_LN>; -def VMUL_N : IOpInst<"vmul_n", "dds", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; -def VMUL_LANE : IOpInst<"vmul_lane", "ddgi", +def VMLSL_LANE : SOpInst<"vmlsl_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLSL_LN>; +def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "(>Q)(>Q)..I", "si", OP_QDMLSL_LN>; +def VMUL_N : IOpInst<"vmul_n", "..1", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>; +def VMUL_LANE : IOpInst<"vmul_lane", "..qI", "sifUsUiQsQiQfQUsQUi", OP_MUL_LN>; -def VMULL_N : SOpInst<"vmull_n", "wds", "siUsUi", OP_MULL_N>; -def VMULL_LANE : SOpInst<"vmull_lane", "wddi", "siUsUi", OP_MULL_LN>; -def VQDMULL_N : SOpInst<"vqdmull_n", "wds", "si", OP_QDMULL_N>; -def VQDMULL_LANE : SOpInst<"vqdmull_lane", "wddi", "si", OP_QDMULL_LN>; -def VQDMULH_N : SOpInst<"vqdmulh_n", "dds", "siQsQi", OP_QDMULH_N>; -def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "ddgi", "siQsQi", OP_QDMULH_LN>; -def VQRDMULH_N : SOpInst<"vqrdmulh_n", "dds", "siQsQi", OP_QRDMULH_N>; -def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ddgi", "siQsQi", OP_QRDMULH_LN>; +def VMULL_N : SOpInst<"vmull_n", "(>Q).1", "siUsUi", OP_MULL_N>; +def VMULL_LANE : SOpInst<"vmull_lane", "(>Q)..I", "siUsUi", OP_MULL_LN>; +def VQDMULL_N : SOpInst<"vqdmull_n", "(>Q).1", "si", OP_QDMULL_N>; +def VQDMULL_LANE : SOpInst<"vqdmull_lane", "(>Q)..I", "si", OP_QDMULL_LN>; +def VQDMULH_N : SOpInst<"vqdmulh_n", "..1", "siQsQi", OP_QDMULH_N>; +def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>; +def VQRDMULH_N : SOpInst<"vqrdmulh_n", "..1", "siQsQi", OP_QRDMULH_N>; +def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { -def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "dddgi", "siQsQi", OP_QRDMLAH_LN>; -def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "dddgi", "siQsQi", OP_QRDMLSH_LN>; +def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "...qI", "siQsQi", OP_QRDMLAH_LN>; +def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "...qI", "siQsQi", OP_QRDMLSH_LN>; } -def VMLA_N : IOpInst<"vmla_n", "ddds", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; -def VMLAL_N : SOpInst<"vmlal_n", "wwds", "siUsUi", OP_MLAL_N>; -def VQDMLAL_N : SOpInst<"vqdmlal_n", "wwds", "si", OP_QDMLAL_N>; -def VMLS_N : IOpInst<"vmls_n", "ddds", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; -def VMLSL_N : SOpInst<"vmlsl_n", "wwds", "siUsUi", OP_MLSL_N>; -def VQDMLSL_N : SOpInst<"vqdmlsl_n", "wwds", "si", OP_QDMLSL_N>; +def VMLA_N : IOpInst<"vmla_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; +def VMLAL_N : SOpInst<"vmlal_n", "(>Q)(>Q).1", "siUsUi", OP_MLAL_N>; +def VQDMLAL_N : SOpInst<"vqdmlal_n", "(>Q)(>Q).1", "si", OP_QDMLAL_N>; +def VMLS_N : IOpInst<"vmls_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLS_N>; +def VMLSL_N : SOpInst<"vmlsl_n", "(>Q)(>Q).1", "siUsUi", OP_MLSL_N>; +def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>; //////////////////////////////////////////////////////////////////////////////// // E.3.26 Vector Extract -def VEXT : WInst<"vext", "dddi", +def VEXT : WInst<"vext", "...I", "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">; //////////////////////////////////////////////////////////////////////////////// // E.3.27 Reverse vector elements -def VREV64 : WOpInst<"vrev64", "dd", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", +def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf", OP_REV64>; -def VREV32 : WOpInst<"vrev32", "dd", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; -def VREV16 : WOpInst<"vrev16", "dd", "cUcPcQcQUcQPc", OP_REV16>; +def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>; +def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPc", OP_REV16>; //////////////////////////////////////////////////////////////////////////////// // E.3.28 Other single operand arithmetic -def VABS : SInst<"vabs", "dd", "csifQcQsQiQf">; -def VQABS : SInst<"vqabs", "dd", "csiQcQsQi">; -def VNEG : SOpInst<"vneg", "dd", "csifQcQsQiQf", OP_NEG>; -def VQNEG : SInst<"vqneg", "dd", "csiQcQsQi">; -def VCLS : SInst<"vcls", "dd", "csiQcQsQi">; -def VCLZ : IInst<"vclz", "dd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VCNT : WInst<"vcnt", "dd", "UccPcQUcQcQPc">; -def VRECPE : SInst<"vrecpe", "dd", "fUiQfQUi">; -def VRSQRTE : SInst<"vrsqrte", "dd", "fUiQfQUi">; +def VABS : SInst<"vabs", "..", "csifQcQsQiQf">; +def VQABS : SInst<"vqabs", "..", "csiQcQsQi">; +def VNEG : SOpInst<"vneg", "..", "csifQcQsQiQf", OP_NEG>; +def VQNEG : SInst<"vqneg", "..", "csiQcQsQi">; +def VCLS : SInst<"vcls", "..", "csiQcQsQi">; +def VCLZ : IInst<"vclz", "..", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VCNT : WInst<"vcnt", "..", "UccPcQUcQcQPc">; +def VRECPE : SInst<"vrecpe", "..", "fUiQfQUi">; +def VRSQRTE : SInst<"vrsqrte", "..", "fUiQfQUi">; //////////////////////////////////////////////////////////////////////////////// // E.3.29 Logical operations -def VMVN : LOpInst<"vmvn", "dd", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; -def VAND : LOpInst<"vand", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; -def VORR : LOpInst<"vorr", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; -def VEOR : LOpInst<"veor", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; -def VBIC : LOpInst<"vbic", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; -def VORN : LOpInst<"vorn", "ddd", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; +def VMVN : LOpInst<"vmvn", "..", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>; +def VAND : LOpInst<"vand", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>; +def VORR : LOpInst<"vorr", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>; +def VEOR : LOpInst<"veor", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>; +def VBIC : LOpInst<"vbic", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>; +def VORN : LOpInst<"vorn", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>; let isHiddenLInst = 1 in -def VBSL : SInst<"vbsl", "dudd", +def VBSL : SInst<"vbsl", ".U..", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.30 Transposition operations -def VTRN : WInst<"vtrn", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; -def VZIP : WInst<"vzip", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; -def VUZP : WInst<"vuzp", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; +def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; //////////////////////////////////////////////////////////////////////////////// // E.3.31 Vector reinterpret cast operations def VREINTERPRET - : NoTestOpInst<"vreinterpret", "dd", + : NoTestOpInst<"vreinterpret", "..", "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT> { let CartesianProductOfTypes = 1; let ArchGuard = "!defined(__aarch64__)"; @@ -599,17 +600,17 @@ def VREINTERPRET // Vector fused multiply-add operations let ArchGuard = "defined(__ARM_FEATURE_FMA)" in { - def VFMA : SInst<"vfma", "dddd", "fQf">; - def VFMS : SOpInst<"vfms", "dddd", "fQf", OP_FMLS>; - def FMLA_N_F32 : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>; + def VFMA : SInst<"vfma", "....", "fQf">; + def VFMS : SOpInst<"vfms", "....", "fQf", OP_FMLS>; + def FMLA_N_F32 : SOpInst<"vfma_n", "...1", "fQf", OP_FMLA_N>; } //////////////////////////////////////////////////////////////////////////////// // fp16 vector operations -def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>; -def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>; -def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>; -def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>; +def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "1.I", "h", OP_SCALAR_HALF_GET_LN>; +def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", ".1.I", "h", OP_SCALAR_HALF_SET_LN>; +def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>; +def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>; //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics @@ -618,474 +619,474 @@ let ArchGuard = "defined(__aarch64__)" in { //////////////////////////////////////////////////////////////////////////////// // Load/Store -def LD1 : WInst<"vld1", "dc", "dQdPlQPl">; -def LD2 : WInst<"vld2", "2c", "QUlQldQdPlQPl">; -def LD3 : WInst<"vld3", "3c", "QUlQldQdPlQPl">; -def LD4 : WInst<"vld4", "4c", "QUlQldQdPlQPl">; -def ST1 : WInst<"vst1", "vpd", "dQdPlQPl">; -def ST2 : WInst<"vst2", "vp2", "QUlQldQdPlQPl">; -def ST3 : WInst<"vst3", "vp3", "QUlQldQdPlQPl">; -def ST4 : WInst<"vst4", "vp4", "QUlQldQdPlQPl">; - -def LD1_X2 : WInst<"vld1_x2", "2c", +def LD1 : WInst<"vld1", ".(c*!)", "dQdPlQPl">; +def LD2 : WInst<"vld2", "2(c*!)", "QUlQldQdPlQPl">; +def LD3 : WInst<"vld3", "3(c*!)", "QUlQldQdPlQPl">; +def LD4 : WInst<"vld4", "4(c*!)", "QUlQldQdPlQPl">; +def ST1 : WInst<"vst1", "v*(.!)", "dQdPlQPl">; +def ST2 : WInst<"vst2", "v*(2!)", "QUlQldQdPlQPl">; +def ST3 : WInst<"vst3", "v*(3!)", "QUlQldQdPlQPl">; +def ST4 : WInst<"vst4", "v*(4!)", "QUlQldQdPlQPl">; + +def LD1_X2 : WInst<"vld1_x2", "2(c*!)", "dQdPlQPl">; -def LD1_X3 : WInst<"vld1_x3", "3c", +def LD1_X3 : WInst<"vld1_x3", "3(c*!)", "dQdPlQPl">; -def LD1_X4 : WInst<"vld1_x4", "4c", +def LD1_X4 : WInst<"vld1_x4", "4(c*!)", "dQdPlQPl">; -def ST1_X2 : WInst<"vst1_x2", "vp2", "dQdPlQPl">; -def ST1_X3 : WInst<"vst1_x3", "vp3", "dQdPlQPl">; -def ST1_X4 : WInst<"vst1_x4", "vp4", "dQdPlQPl">; +def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">; +def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">; +def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">; -def LD1_LANE : WInst<"vld1_lane", "dcdi", "dQdPlQPl">; -def LD2_LANE : WInst<"vld2_lane", "2c2i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD3_LANE : WInst<"vld3_lane", "3c3i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD4_LANE : WInst<"vld4_lane", "4c4i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST1_LANE : WInst<"vst1_lane", "vpdi", "dQdPlQPl">; -def ST2_LANE : WInst<"vst2_lane", "vp2i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST3_LANE : WInst<"vst3_lane", "vp3i", "lUlQcQUcQPcQlQUldQdPlQPl">; -def ST4_LANE : WInst<"vst4_lane", "vp4i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">; +def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">; +def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">; -def LD1_DUP : WInst<"vld1_dup", "dc", "dQdPlQPl">; -def LD2_DUP : WInst<"vld2_dup", "2c", "dQdPlQPl">; -def LD3_DUP : WInst<"vld3_dup", "3c", "dQdPlQPl">; -def LD4_DUP : WInst<"vld4_dup", "4c", "dQdPlQPl">; +def LD1_DUP : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">; +def LD2_DUP : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">; +def LD3_DUP : WInst<"vld3_dup", "3(c*!)", "dQdPlQPl">; +def LD4_DUP : WInst<"vld4_dup", "4(c*!)", "dQdPlQPl">; -def VLDRQ : WInst<"vldrq", "sc", "Pk">; -def VSTRQ : WInst<"vstrq", "vps", "Pk">; +def VLDRQ : WInst<"vldrq", "1(c*!)", "Pk">; +def VSTRQ : WInst<"vstrq", "v*(1!)", "Pk">; //////////////////////////////////////////////////////////////////////////////// // Addition -def ADD : IOpInst<"vadd", "ddd", "dQd", OP_ADD>; +def ADD : IOpInst<"vadd", "...", "dQd", OP_ADD>; //////////////////////////////////////////////////////////////////////////////// // Subtraction -def SUB : IOpInst<"vsub", "ddd", "dQd", OP_SUB>; +def SUB : IOpInst<"vsub", "...", "dQd", OP_SUB>; //////////////////////////////////////////////////////////////////////////////// // Multiplication -def MUL : IOpInst<"vmul", "ddd", "dQd", OP_MUL>; -def MLA : IOpInst<"vmla", "dddd", "dQd", OP_MLA>; -def MLS : IOpInst<"vmls", "dddd", "dQd", OP_MLS>; +def MUL : IOpInst<"vmul", "...", "dQd", OP_MUL>; +def MLA : IOpInst<"vmla", "....", "dQd", OP_MLA>; +def MLS : IOpInst<"vmls", "....", "dQd", OP_MLS>; //////////////////////////////////////////////////////////////////////////////// // Multiplication Extended -def MULX : SInst<"vmulx", "ddd", "fdQfQd">; +def MULX : SInst<"vmulx", "...", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Division -def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>; +def FDIV : IOpInst<"vdiv", "...", "fdQfQd", OP_DIV>; //////////////////////////////////////////////////////////////////////////////// // Vector fused multiply-add operations -def FMLA : SInst<"vfma", "dddd", "dQd">; -def FMLS : SOpInst<"vfms", "dddd", "dQd", OP_FMLS>; +def FMLA : SInst<"vfma", "....", "dQd">; +def FMLS : SOpInst<"vfms", "....", "dQd", OP_FMLS>; //////////////////////////////////////////////////////////////////////////////// // MUL, MLA, MLS, FMA, FMS definitions with scalar argument -def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; +def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "dQd", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fdQfQd", OP_FMLS_N>; +def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>; -def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; -def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; +def MLA_N : SOpInst<"vmla_n", "...1", "Qd", OP_MLA_N>; +def MLS_N : SOpInst<"vmls_n", "...1", "Qd", OP_MLS_N>; //////////////////////////////////////////////////////////////////////////////// // Logical operations -def BSL : SInst<"vbsl", "dudd", "dPlQdQPl">; +def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">; //////////////////////////////////////////////////////////////////////////////// // Absolute Difference -def ABD : SInst<"vabd", "ddd", "dQd">; +def ABD : SInst<"vabd", "...", "dQd">; //////////////////////////////////////////////////////////////////////////////// // saturating absolute/negate -def ABS : SInst<"vabs", "dd", "dQdlQl">; -def QABS : SInst<"vqabs", "dd", "lQl">; -def NEG : SOpInst<"vneg", "dd", "dlQdQl", OP_NEG>; -def QNEG : SInst<"vqneg", "dd", "lQl">; +def ABS : SInst<"vabs", "..", "dQdlQl">; +def QABS : SInst<"vqabs", "..", "lQl">; +def NEG : SOpInst<"vneg", "..", "dlQdQl", OP_NEG>; +def QNEG : SInst<"vqneg", "..", "lQl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Accumulated of Unsigned Value -def SUQADD : SInst<"vuqadd", "ddu", "csilQcQsQiQl">; +def SUQADD : SInst<"vuqadd", "..U", "csilQcQsQiQl">; //////////////////////////////////////////////////////////////////////////////// // Unsigned Saturating Accumulated of Signed Value -def USQADD : SInst<"vsqadd", "ddx", "UcUsUiUlQUcQUsQUiQUl">; +def USQADD : SInst<"vsqadd", "..S", "UcUsUiUlQUcQUsQUiQUl">; //////////////////////////////////////////////////////////////////////////////// // Reciprocal/Sqrt -def FRECPS : IInst<"vrecps", "ddd", "dQd">; -def FRSQRTS : IInst<"vrsqrts", "ddd", "dQd">; -def FRECPE : SInst<"vrecpe", "dd", "dQd">; -def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">; -def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">; +def FRECPS : IInst<"vrecps", "...", "dQd">; +def FRSQRTS : IInst<"vrsqrts", "...", "dQd">; +def FRECPE : SInst<"vrecpe", "..", "dQd">; +def FRSQRTE : SInst<"vrsqrte", "..", "dQd">; +def FSQRT : SInst<"vsqrt", "..", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // bitwise reverse -def RBIT : IInst<"vrbit", "dd", "cUcPcQcQUcQPc">; +def RBIT : IInst<"vrbit", "..", "cUcPcQcQUcQPc">; //////////////////////////////////////////////////////////////////////////////// // Integer extract and narrow to high -def XTN2 : SOpInst<"vmovn_high", "qhk", "silUsUiUl", OP_XTN>; +def XTN2 : SOpInst<"vmovn_high", "(; //////////////////////////////////////////////////////////////////////////////// // Signed integer saturating extract and unsigned narrow to high -def SQXTUN2 : SOpInst<"vqmovun_high", "emd", "HsHiHl", OP_SQXTUN>; +def SQXTUN2 : SOpInst<"vqmovun_high", "(; //////////////////////////////////////////////////////////////////////////////// // Integer saturating extract and narrow to high -def QXTN2 : SOpInst<"vqmovn_high", "qhk", "silUsUiUl", OP_QXTN>; +def QXTN2 : SOpInst<"vqmovn_high", "(; //////////////////////////////////////////////////////////////////////////////// // Converting vectors -def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "md", "Qd">; -def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">; +def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "(; +def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "(>Q).", "f">; -def VCVT_S64 : SInst<"vcvt_s64", "xd", "dQd">; -def VCVT_U64 : SInst<"vcvt_u64", "ud", "dQd">; -def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">; +def VCVT_S64 : SInst<"vcvt_s64", "S.", "dQd">; +def VCVT_U64 : SInst<"vcvt_u64", "U.", "dQd">; +def VCVT_F64 : SInst<"vcvt_f64", "F(.!)", "lUlQlQUl">; -def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "hmj", "Hf", OP_VCVT_NA_HI_F16>; -def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI_F32>; -def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI_F32>; -def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>; +def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "<(; +def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "(>Q)(Q!)", "h", OP_VCVT_EX_HI_F32>; +def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "(; +def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "(>Q)(Q!)", "f", OP_VCVT_EX_HI_F64>; -def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">; -def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>; +def VCVTX_F32_F64 : SInst<"vcvtx_f32", "(F<)(Q!)", "d">; +def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "(; //////////////////////////////////////////////////////////////////////////////// // Comparison -def FCAGE : IInst<"vcage", "udd", "dQd">; -def FCAGT : IInst<"vcagt", "udd", "dQd">; -def FCALE : IInst<"vcale", "udd", "dQd">; -def FCALT : IInst<"vcalt", "udd", "dQd">; -def CMTST : WInst<"vtst", "udd", "lUlPlQlQUlQPl">; -def CFMEQ : SOpInst<"vceq", "udd", "lUldQdQlQUlPlQPl", OP_EQ>; -def CFMGE : SOpInst<"vcge", "udd", "lUldQdQlQUl", OP_GE>; -def CFMLE : SOpInst<"vcle", "udd", "lUldQdQlQUl", OP_LE>; -def CFMGT : SOpInst<"vcgt", "udd", "lUldQdQlQUl", OP_GT>; -def CFMLT : SOpInst<"vclt", "udd", "lUldQdQlQUl", OP_LT>; - -def CMEQ : SInst<"vceqz", "ud", +def FCAGE : IInst<"vcage", "U..", "dQd">; +def FCAGT : IInst<"vcagt", "U..", "dQd">; +def FCALE : IInst<"vcale", "U..", "dQd">; +def FCALT : IInst<"vcalt", "U..", "dQd">; +def CMTST : WInst<"vtst", "U..", "lUlPlQlQUlQPl">; +def CFMEQ : SOpInst<"vceq", "U..", "lUldQdQlQUlPlQPl", OP_EQ>; +def CFMGE : SOpInst<"vcge", "U..", "lUldQdQlQUl", OP_GE>; +def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>; +def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>; +def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>; + +def CMEQ : SInst<"vceqz", "U.", "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; -def CMGE : SInst<"vcgez", "ud", "csilfdQcQsQiQlQfQd">; -def CMLE : SInst<"vclez", "ud", "csilfdQcQsQiQlQfQd">; -def CMGT : SInst<"vcgtz", "ud", "csilfdQcQsQiQlQfQd">; -def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; +def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">; +def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">; +def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">; +def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer -def MAX : SInst<"vmax", "ddd", "dQd">; -def MIN : SInst<"vmin", "ddd", "dQd">; +def MAX : SInst<"vmax", "...", "dQd">; +def MIN : SInst<"vmin", "...", "dQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Max/Min -def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">; -def MINP : SInst<"vpmin", "ddd", "QcQsQiQUcQUsQUiQfQd">; +def MAXP : SInst<"vpmax", "...", "QcQsQiQUcQUsQUiQfQd">; +def MINP : SInst<"vpmin", "...", "QcQsQiQUcQUsQUiQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise MaxNum/MinNum Floating Point -def FMAXNMP : SInst<"vpmaxnm", "ddd", "fQfQd">; -def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">; +def FMAXNMP : SInst<"vpmaxnm", "...", "fQfQd">; +def FMINNMP : SInst<"vpminnm", "...", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Addition -def ADDP : IInst<"vpadd", "ddd", "QcQsQiQlQUcQUsQUiQUlQfQd">; +def ADDP : IInst<"vpadd", "...", "QcQsQiQlQUcQUsQUiQUlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Shifts by constant let isShift = 1 in { // Left shift long high -def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi", +def SHLL_HIGH_N : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi", OP_LONG_HI>; //////////////////////////////////////////////////////////////////////////////// -def SRI_N : WInst<"vsri_n", "dddi", "PlQPl">; -def SLI_N : WInst<"vsli_n", "dddi", "PlQPl">; +def SRI_N : WInst<"vsri_n", "...I", "PlQPl">; +def SLI_N : WInst<"vsli_n", "...I", "PlQPl">; // Right shift narrow high -def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi", +def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "<(; -def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "hmdi", +def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "<(; -def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "hmdi", +def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "<(; -def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "hmdi", +def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "<(; -def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "hmdi", +def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "<(; -def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "hmdi", +def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "<(; } //////////////////////////////////////////////////////////////////////////////// // Converting vectors -def VMOVL_HIGH : SOpInst<"vmovl_high", "nd", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; +def VMOVL_HIGH : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>; let isVCVT_N = 1 in { -def CVTF_N_F64 : SInst<"vcvt_n_f64", "Fdi", "lUlQlQUl">; -def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "xdi", "dQd">; -def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "udi", "dQd">; +def CVTF_N_F64 : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">; +def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">; +def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // 3VDiff class using high 64-bit in operands -def VADDL_HIGH : SOpInst<"vaddl_high", "wkk", "csiUcUsUi", OP_ADDLHi>; -def VADDW_HIGH : SOpInst<"vaddw_high", "wwk", "csiUcUsUi", OP_ADDWHi>; -def VSUBL_HIGH : SOpInst<"vsubl_high", "wkk", "csiUcUsUi", OP_SUBLHi>; -def VSUBW_HIGH : SOpInst<"vsubw_high", "wwk", "csiUcUsUi", OP_SUBWHi>; +def VADDL_HIGH : SOpInst<"vaddl_high", "(>Q)QQ", "csiUcUsUi", OP_ADDLHi>; +def VADDW_HIGH : SOpInst<"vaddw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_ADDWHi>; +def VSUBL_HIGH : SOpInst<"vsubl_high", "(>Q)QQ", "csiUcUsUi", OP_SUBLHi>; +def VSUBW_HIGH : SOpInst<"vsubw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_SUBWHi>; -def VABDL_HIGH : SOpInst<"vabdl_high", "wkk", "csiUcUsUi", OP_ABDLHi>; -def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>; +def VABDL_HIGH : SOpInst<"vabdl_high", "(>Q)QQ", "csiUcUsUi", OP_ABDLHi>; +def VABAL_HIGH : SOpInst<"vabal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_ABALHi>; -def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>; -def VMULL_HIGH_N : SOpInst<"vmull_high_n", "wks", "siUsUi", OP_MULLHi_N>; -def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>; -def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "wwks", "siUsUi", OP_MLALHi_N>; -def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>; -def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "wwks", "siUsUi", OP_MLSLHi_N>; +def VMULL_HIGH : SOpInst<"vmull_high", "(>Q)QQ", "csiUcUsUiPc", OP_MULLHi>; +def VMULL_HIGH_N : SOpInst<"vmull_high_n", "(>Q)Q1", "siUsUi", OP_MULLHi_N>; +def VMLAL_HIGH : SOpInst<"vmlal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLALHi>; +def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLALHi_N>; +def VMLSL_HIGH : SOpInst<"vmlsl_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLSLHi>; +def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLSLHi_N>; -def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>; -def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>; -def VSUBHN_HIGH : SOpInst<"vsubhn_high", "qhkk", "silUsUiUl", OP_SUBHNHi>; -def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>; +def VADDHN_HIGH : SOpInst<"vaddhn_high", "(; +def VRADDHN_HIGH : SOpInst<"vraddhn_high", "(; +def VSUBHN_HIGH : SOpInst<"vsubhn_high", "(; +def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "(; -def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>; -def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "wks", "si", OP_QDMULLHi_N>; -def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>; -def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>; -def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; -def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>; -def VMULL_P64 : SInst<"vmull", "rss", "Pl">; -def VMULL_HIGH_P64 : SOpInst<"vmull_high", "rdd", "HPl", OP_MULLHi_P64>; +def VQDMULL_HIGH : SOpInst<"vqdmull_high", "(>Q)QQ", "si", OP_QDMULLHi>; +def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "(>Q)Q1", "si", OP_QDMULLHi_N>; +def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "(>Q)(>Q)QQ", "si", OP_QDMLALHi>; +def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLALHi_N>; +def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "(>Q)(>Q)QQ", "si", OP_QDMLSLHi>; +def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLSLHi_N>; +def VMULL_P64 : SInst<"vmull", "(1>)11", "Pl">; +def VMULL_HIGH_P64 : SOpInst<"vmull_high", "(1>)..", "HPl", OP_MULLHi_P64>; //////////////////////////////////////////////////////////////////////////////// // Extract or insert element from vector -def GET_LANE : IInst<"vget_lane", "sdi", "dQdPlQPl">; -def SET_LANE : IInst<"vset_lane", "dsdi", "dQdPlQPl">; -def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", +def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">; +def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">; +def COPY_LANE : IOpInst<"vcopy_lane", "..I.I", "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>; -def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", +def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; -def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", +def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI", "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>; -def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", +def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value -def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", "hdQhQdPlQPl", OP_DUP_LN>; -def VDUP_LANE2: WOpInst<"vdup_laneq", "dji", +def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "hdQhQdPlQPl", OP_DUP_LN>; +def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI", "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", OP_DUP_LN>; -def DUP_N : WOpInst<"vdup_n", "ds", "dQdPlQPl", OP_DUP>; -def MOV_N : WOpInst<"vmov_n", "ds", "dQdPlQPl", OP_DUP>; +def DUP_N : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>; +def MOV_N : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>; //////////////////////////////////////////////////////////////////////////////// -def COMBINE : NoTestOpInst<"vcombine", "kdd", "dPl", OP_CONC>; +def COMBINE : NoTestOpInst<"vcombine", "Q..", "dPl", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// //Initialize a vector from bit pattern -def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST> { +def CREATE : NoTestOpInst<"vcreate", ".(IU>)", "dPl", OP_CAST> { let BigEndianSafe = 1; } //////////////////////////////////////////////////////////////////////////////// -def VMLA_LANEQ : IOpInst<"vmla_laneq", "dddji", +def VMLA_LANEQ : IOpInst<"vmla_laneq", "...QI", "siUsUifQsQiQUsQUiQf", OP_MLA_LN>; -def VMLS_LANEQ : IOpInst<"vmls_laneq", "dddji", +def VMLS_LANEQ : IOpInst<"vmls_laneq", "...QI", "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; -def VFMA_LANE : IInst<"vfma_lane", "dddgi", "fdQfQd">; -def VFMA_LANEQ : IInst<"vfma_laneq", "dddji", "fdQfQd"> { +def VFMA_LANE : IInst<"vfma_lane", "...qI", "fdQfQd">; +def VFMA_LANEQ : IInst<"vfma_laneq", "...QI", "fdQfQd"> { let isLaneQ = 1; } -def VFMS_LANE : IOpInst<"vfms_lane", "dddgi", "fdQfQd", OP_FMS_LN>; -def VFMS_LANEQ : IOpInst<"vfms_laneq", "dddji", "fdQfQd", OP_FMS_LNQ>; +def VFMS_LANE : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>; +def VFMS_LANEQ : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ>; -def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "wwdki", "siUsUi", OP_MLAL_LN>; -def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "wwkdi", "siUsUi", +def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN>; +def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "(>Q)(>Q)Q.I", "siUsUi", OP_MLALHi_LN>; -def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "wwkki", "siUsUi", +def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "(>Q)(>Q)QQI", "siUsUi", OP_MLALHi_LN>; -def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "wwdki", "siUsUi", OP_MLSL_LN>; -def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "wwkdi", "siUsUi", +def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN>; +def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "(>Q)(>Q)Q.I", "siUsUi", OP_MLSLHi_LN>; -def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "wwkki", "siUsUi", +def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "(>Q)(>Q)QQI", "siUsUi", OP_MLSLHi_LN>; -def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "wwdki", "si", OP_QDMLAL_LN>; -def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "wwkdi", "si", +def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN>; +def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "(>Q)(>Q)Q.I", "si", OP_QDMLALHi_LN>; -def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "wwkki", "si", +def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "(>Q)(>Q)QQI", "si", OP_QDMLALHi_LN>; -def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "wwdki", "si", OP_QDMLSL_LN>; -def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "wwkdi", "si", +def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN>; +def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "(>Q)(>Q)Q.I", "si", OP_QDMLSLHi_LN>; -def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "wwkki", "si", +def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "(>Q)(>Q)QQI", "si", OP_QDMLSLHi_LN>; // Newly add double parameter for vmul_lane in aarch64 // Note: d type is handled by SCALAR_VMUL_LANE -def VMUL_LANE_A64 : IOpInst<"vmul_lane", "ddgi", "Qd", OP_MUL_LN>; +def VMUL_LANE_A64 : IOpInst<"vmul_lane", "..qI", "Qd", OP_MUL_LN>; // Note: d type is handled by SCALAR_VMUL_LANEQ -def VMUL_LANEQ : IOpInst<"vmul_laneq", "ddji", +def VMUL_LANEQ : IOpInst<"vmul_laneq", "..QI", "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>; -def VMULL_LANEQ : SOpInst<"vmull_laneq", "wdki", "siUsUi", OP_MULL_LN>; -def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "wkdi", "siUsUi", +def VMULL_LANEQ : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN>; +def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "(>Q)Q.I", "siUsUi", OP_MULLHi_LN>; -def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "wkki", "siUsUi", +def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "(>Q)QQI", "siUsUi", OP_MULLHi_LN>; -def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "wdki", "si", OP_QDMULL_LN>; -def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "wkdi", "si", +def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN>; +def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si", OP_QDMULLHi_LN>; -def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "wkki", "si", +def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si", OP_QDMULLHi_LN>; -def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>; -def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>; +def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "..QI", "siQsQi", OP_QDMULH_LN>; +def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "..QI", "siQsQi", OP_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { -def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "dddji", "siQsQi", OP_QRDMLAH_LN>; -def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "dddji", "siQsQi", OP_QRDMLSH_LN>; +def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN>; +def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN>; } // Note: d type implemented by SCALAR_VMULX_LANE -def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>; +def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>; // Note: d type is implemented by SCALAR_VMULX_LANEQ -def VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "fQfQd", OP_MULX_LN>; +def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN>; //////////////////////////////////////////////////////////////////////////////// // Across vectors class -def VADDLV : SInst<"vaddlv", "rd", "csiUcUsUiQcQsQiQUcQUsQUi">; -def VMAXV : SInst<"vmaxv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; -def VMINV : SInst<"vminv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; -def VADDV : SInst<"vaddv", "sd", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">; -def FMAXNMV : SInst<"vmaxnmv", "sd", "fQfQd">; -def FMINNMV : SInst<"vminnmv", "sd", "fQfQd">; +def VADDLV : SInst<"vaddlv", "(1>).", "csiUcUsUiQcQsQiQUcQUsQUi">; +def VMAXV : SInst<"vmaxv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; +def VMINV : SInst<"vminv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">; +def VADDV : SInst<"vaddv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">; +def FMAXNMV : SInst<"vmaxnmv", "1.", "fQfQd">; +def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Newly added Vector Extract for f64 -def VEXT_A64 : WInst<"vext", "dddi", "dQdPlQPl">; +def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">; //////////////////////////////////////////////////////////////////////////////// // Crypto let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)" in { -def AESE : SInst<"vaese", "ddd", "QUc">; -def AESD : SInst<"vaesd", "ddd", "QUc">; -def AESMC : SInst<"vaesmc", "dd", "QUc">; -def AESIMC : SInst<"vaesimc", "dd", "QUc">; - -def SHA1H : SInst<"vsha1h", "ss", "Ui">; -def SHA1SU1 : SInst<"vsha1su1", "ddd", "QUi">; -def SHA256SU0 : SInst<"vsha256su0", "ddd", "QUi">; - -def SHA1C : SInst<"vsha1c", "ddsd", "QUi">; -def SHA1P : SInst<"vsha1p", "ddsd", "QUi">; -def SHA1M : SInst<"vsha1m", "ddsd", "QUi">; -def SHA1SU0 : SInst<"vsha1su0", "dddd", "QUi">; -def SHA256H : SInst<"vsha256h", "dddd", "QUi">; -def SHA256H2 : SInst<"vsha256h2", "dddd", "QUi">; -def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">; +def AESE : SInst<"vaese", "...", "QUc">; +def AESD : SInst<"vaesd", "...", "QUc">; +def AESMC : SInst<"vaesmc", "..", "QUc">; +def AESIMC : SInst<"vaesimc", "..", "QUc">; + +def SHA1H : SInst<"vsha1h", "11", "Ui">; +def SHA1SU1 : SInst<"vsha1su1", "...", "QUi">; +def SHA256SU0 : SInst<"vsha256su0", "...", "QUi">; + +def SHA1C : SInst<"vsha1c", "..1.", "QUi">; +def SHA1P : SInst<"vsha1p", "..1.", "QUi">; +def SHA1M : SInst<"vsha1m", "..1.", "QUi">; +def SHA1SU0 : SInst<"vsha1su0", "....", "QUi">; +def SHA256H : SInst<"vsha256h", "....", "QUi">; +def SHA256H2 : SInst<"vsha256h2", "....", "QUi">; +def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">; } //////////////////////////////////////////////////////////////////////////////// // Float -> Int conversions with explicit rounding mode let ArchGuard = "__ARM_ARCH >= 8" in { -def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">; -def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">; -def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">; -def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">; -def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">; -def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">; -def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">; -def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">; +def FCVTNS_S32 : SInst<"vcvtn_s32", "S.", "fQf">; +def FCVTNU_S32 : SInst<"vcvtn_u32", "U.", "fQf">; +def FCVTPS_S32 : SInst<"vcvtp_s32", "S.", "fQf">; +def FCVTPU_S32 : SInst<"vcvtp_u32", "U.", "fQf">; +def FCVTMS_S32 : SInst<"vcvtm_s32", "S.", "fQf">; +def FCVTMU_S32 : SInst<"vcvtm_u32", "U.", "fQf">; +def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">; +def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in { -def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "dQd">; -def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "dQd">; -def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "dQd">; -def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "dQd">; -def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "dQd">; -def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "dQd">; -def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">; -def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">; +def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">; +def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">; +def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">; +def FCVTPU_S64 : SInst<"vcvtp_u64", "U.", "dQd">; +def FCVTMS_S64 : SInst<"vcvtm_s64", "S.", "dQd">; +def FCVTMU_S64 : SInst<"vcvtm_u64", "U.", "dQd">; +def FCVTAS_S64 : SInst<"vcvta_s64", "S.", "dQd">; +def FCVTAU_S64 : SInst<"vcvta_u64", "U.", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // Round to Integral let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def FRINTN_S32 : SInst<"vrndn", "dd", "fQf">; -def FRINTA_S32 : SInst<"vrnda", "dd", "fQf">; -def FRINTP_S32 : SInst<"vrndp", "dd", "fQf">; -def FRINTM_S32 : SInst<"vrndm", "dd", "fQf">; -def FRINTX_S32 : SInst<"vrndx", "dd", "fQf">; -def FRINTZ_S32 : SInst<"vrnd", "dd", "fQf">; -def FRINTI_S32 : SInst<"vrndi", "dd", "fQf">; +def FRINTN_S32 : SInst<"vrndn", "..", "fQf">; +def FRINTA_S32 : SInst<"vrnda", "..", "fQf">; +def FRINTP_S32 : SInst<"vrndp", "..", "fQf">; +def FRINTM_S32 : SInst<"vrndm", "..", "fQf">; +def FRINTX_S32 : SInst<"vrndx", "..", "fQf">; +def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">; +def FRINTI_S32 : SInst<"vrndi", "..", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def FRINTN_S64 : SInst<"vrndn", "dd", "dQd">; -def FRINTA_S64 : SInst<"vrnda", "dd", "dQd">; -def FRINTP_S64 : SInst<"vrndp", "dd", "dQd">; -def FRINTM_S64 : SInst<"vrndm", "dd", "dQd">; -def FRINTX_S64 : SInst<"vrndx", "dd", "dQd">; -def FRINTZ_S64 : SInst<"vrnd", "dd", "dQd">; -def FRINTI_S64 : SInst<"vrndi", "dd", "dQd">; +def FRINTN_S64 : SInst<"vrndn", "..", "dQd">; +def FRINTA_S64 : SInst<"vrnda", "..", "dQd">; +def FRINTP_S64 : SInst<"vrndp", "..", "dQd">; +def FRINTM_S64 : SInst<"vrndm", "..", "dQd">; +def FRINTX_S64 : SInst<"vrndx", "..", "dQd">; +def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">; +def FRINTI_S64 : SInst<"vrndi", "..", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // MaxNum/MinNum Floating Point let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { -def FMAXNM_S32 : SInst<"vmaxnm", "ddd", "fQf">; -def FMINNM_S32 : SInst<"vminnm", "ddd", "fQf">; +def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">; +def FMINNM_S32 : SInst<"vminnm", "...", "fQf">; } let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in { -def FMAXNM_S64 : SInst<"vmaxnm", "ddd", "dQd">; -def FMINNM_S64 : SInst<"vminnm", "ddd", "dQd">; +def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">; +def FMINNM_S64 : SInst<"vminnm", "...", "dQd">; } //////////////////////////////////////////////////////////////////////////////// // Permutation -def VTRN1 : SOpInst<"vtrn1", "ddd", +def VTRN1 : SOpInst<"vtrn1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>; -def VZIP1 : SOpInst<"vzip1", "ddd", +def VZIP1 : SOpInst<"vzip1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>; -def VUZP1 : SOpInst<"vuzp1", "ddd", +def VUZP1 : SOpInst<"vuzp1", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>; -def VTRN2 : SOpInst<"vtrn2", "ddd", +def VTRN2 : SOpInst<"vtrn2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>; -def VZIP2 : SOpInst<"vzip2", "ddd", +def VZIP2 : SOpInst<"vzip2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>; -def VUZP2 : SOpInst<"vuzp2", "ddd", +def VUZP2 : SOpInst<"vuzp2", "...", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>; //////////////////////////////////////////////////////////////////////////////// // Table lookup let InstName = "vtbl" in { -def VQTBL1_A64 : WInst<"vqtbl1", "dju", "UccPcQUcQcQPc">; -def VQTBL2_A64 : WInst<"vqtbl2", "dBu", "UccPcQUcQcQPc">; -def VQTBL3_A64 : WInst<"vqtbl3", "dCu", "UccPcQUcQcQPc">; -def VQTBL4_A64 : WInst<"vqtbl4", "dDu", "UccPcQUcQcQPc">; +def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPc">; +def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPc">; +def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPc">; +def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPc">; } let InstName = "vtbx" in { -def VQTBX1_A64 : WInst<"vqtbx1", "ddju", "UccPcQUcQcQPc">; -def VQTBX2_A64 : WInst<"vqtbx2", "ddBu", "UccPcQUcQcQPc">; -def VQTBX3_A64 : WInst<"vqtbx3", "ddCu", "UccPcQUcQcQPc">; -def VQTBX4_A64 : WInst<"vqtbx4", "ddDu", "UccPcQUcQcQPc">; +def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPc">; +def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPc">; +def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPc">; +def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">; } //////////////////////////////////////////////////////////////////////////////// @@ -1095,7 +1096,7 @@ def VQTBX4_A64 : WInst<"vqtbx4", "ddDu", "UccPcQUcQcQPc">; // itself during generation so, unlike all other intrinsics, this one should // include *all* types, not just additional ones. def VVREINTERPRET - : NoTestOpInst<"vreinterpret", "dd", + : NoTestOpInst<"vreinterpret", "..", "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT> { let CartesianProductOfTypes = 1; let BigEndianSafe = 1; @@ -1107,332 +1108,332 @@ def VVREINTERPRET // Scalar Arithmetic // Scalar Addition -def SCALAR_ADD : SInst<"vadd", "sss", "SlSUl">; +def SCALAR_ADD : SInst<"vadd", "111", "SlSUl">; // Scalar Saturating Add -def SCALAR_QADD : SInst<"vqadd", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QADD : SInst<"vqadd", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Subtraction -def SCALAR_SUB : SInst<"vsub", "sss", "SlSUl">; +def SCALAR_SUB : SInst<"vsub", "111", "SlSUl">; // Scalar Saturating Sub -def SCALAR_QSUB : SInst<"vqsub", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QSUB : SInst<"vqsub", "111", "ScSsSiSlSUcSUsSUiSUl">; let InstName = "vmov" in { -def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "dPl", OP_HI>; -def VGET_LOW_A64 : NoTestOpInst<"vget_low", "dk", "dPl", OP_LO>; +def VGET_HIGH_A64 : NoTestOpInst<"vget_high", ".Q", "dPl", OP_HI>; +def VGET_LOW_A64 : NoTestOpInst<"vget_low", ".Q", "dPl", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// // Scalar Shift // Scalar Shift Left -def SCALAR_SHL: SInst<"vshl", "sss", "SlSUl">; +def SCALAR_SHL: SInst<"vshl", "111", "SlSUl">; // Scalar Saturating Shift Left -def SCALAR_QSHL: SInst<"vqshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QSHL: SInst<"vqshl", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Saturating Rounding Shift Left -def SCALAR_QRSHL: SInst<"vqrshl", "sss", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_QRSHL: SInst<"vqrshl", "111", "ScSsSiSlSUcSUsSUiSUl">; // Scalar Shift Rounding Left -def SCALAR_RSHL: SInst<"vrshl", "sss", "SlSUl">; +def SCALAR_RSHL: SInst<"vrshl", "111", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Shift (Immediate) let isScalarShift = 1 in { // Signed/Unsigned Shift Right (Immediate) -def SCALAR_SSHR_N: SInst<"vshr_n", "ssi", "SlSUl">; +def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">; // Signed/Unsigned Rounding Shift Right (Immediate) -def SCALAR_SRSHR_N: SInst<"vrshr_n", "ssi", "SlSUl">; +def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">; // Signed/Unsigned Shift Right and Accumulate (Immediate) -def SCALAR_SSRA_N: SInst<"vsra_n", "sssi", "SlSUl">; +def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">; // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate) -def SCALAR_SRSRA_N: SInst<"vrsra_n", "sssi", "SlSUl">; +def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">; // Shift Left (Immediate) -def SCALAR_SHL_N: SInst<"vshl_n", "ssi", "SlSUl">; +def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">; // Signed/Unsigned Saturating Shift Left (Immediate) -def SCALAR_SQSHL_N: SInst<"vqshl_n", "ssi", "ScSsSiSlSUcSUsSUiSUl">; +def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">; // Signed Saturating Shift Left Unsigned (Immediate) -def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "ssi", "ScSsSiSl">; +def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">; // Shift Right And Insert (Immediate) -def SCALAR_SRI_N: SInst<"vsri_n", "sssi", "SlSUl">; +def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">; // Shift Left And Insert (Immediate) -def SCALAR_SLI_N: SInst<"vsli_n", "sssi", "SlSUl">; +def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">; let isScalarNarrowShift = 1 in { // Signed/Unsigned Saturating Shift Right Narrow (Immediate) - def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "zsi", "SsSiSlSUsSUiSUl">; + def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">; // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate) - def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "zsi", "SsSiSlSUsSUiSUl">; + def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">; // Signed Saturating Shift Right Unsigned Narrow (Immediate) - def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "zsi", "SsSiSl">; + def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<)1I", "SsSiSl">; // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) - def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "zsi", "SsSiSl">; + def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<)1I", "SsSiSl">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate) -def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "ysi", "SiSUi">; -def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "osi", "SlSUl">; +def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">; +def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate) -def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "$si", "Sf">; -def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "bsi", "Sf">; -def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "$si", "Sd">; -def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "bsi", "Sd">; +def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">; +def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">; +def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">; +def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Round to Integral let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in { -def SCALAR_FRINTN_S32 : SInst<"vrndn", "ss", "Sf">; +def SCALAR_FRINTN_S32 : SInst<"vrndn", "11", "Sf">; } //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Pairwise Addition (Scalar and Floating Point) -def SCALAR_ADDP : SInst<"vpadd", "sd", "SfSHlSHdSHUl">; +def SCALAR_ADDP : SInst<"vpadd", "1.", "SfSHlSHdSHUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Floating Point Pairwise Max/Min -def SCALAR_FMAXP : SInst<"vpmax", "sd", "SfSQd">; +def SCALAR_FMAXP : SInst<"vpmax", "1.", "SfSQd">; -def SCALAR_FMINP : SInst<"vpmin", "sd", "SfSQd">; +def SCALAR_FMINP : SInst<"vpmin", "1.", "SfSQd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Reduce Floating Point Pairwise maxNum/minNum -def SCALAR_FMAXNMP : SInst<"vpmaxnm", "sd", "SfSQd">; -def SCALAR_FMINNMP : SInst<"vpminnm", "sd", "SfSQd">; +def SCALAR_FMAXNMP : SInst<"vpmaxnm", "1.", "SfSQd">; +def SCALAR_FMINNMP : SInst<"vpminnm", "1.", "SfSQd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Saturating Doubling Multiply Half High -def SCALAR_SQDMULH : SInst<"vqdmulh", "sss", "SsSi">; +def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Saturating Rounding Doubling Multiply Half High -def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">; +def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half -def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "ssss", "SsSi", OP_QRDMLAH>; +def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "1111", "SsSi", OP_QRDMLAH>; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half -def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "ssss", "SsSi", OP_QRDMLSH>; +def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "1111", "SsSi", OP_QRDMLSH>; } //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Multiply Extended -def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">; +def SCALAR_FMULX : IInst<"vmulx", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Step -def SCALAR_FRECPS : IInst<"vrecps", "sss", "SfSd">; +def SCALAR_FRECPS : IInst<"vrecps", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Square Root Step -def SCALAR_FRSQRTS : IInst<"vrsqrts", "sss", "SfSd">; +def SCALAR_FRSQRTS : IInst<"vrsqrts", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Integer Convert To Floating-point -def SCALAR_SCVTFS : SInst<"vcvt_f32", "ys", "Si">; -def SCALAR_SCVTFD : SInst<"vcvt_f64", "os", "Sl">; +def SCALAR_SCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "Si">; +def SCALAR_SCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Integer Convert To Floating-point -def SCALAR_UCVTFS : SInst<"vcvt_f32", "ys", "SUi">; -def SCALAR_UCVTFD : SInst<"vcvt_f64", "os", "SUl">; +def SCALAR_UCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "SUi">; +def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Converts -def SCALAR_FCVTXN : IInst<"vcvtx_f32", "ys", "Sd">; -def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "$s", "Sf">; -def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "bs", "Sf">; -def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "$s", "Sd">; -def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "bs", "Sd">; -def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "$s", "Sf">; -def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "bs", "Sf">; -def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "$s", "Sd">; -def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "bs", "Sd">; -def SCALAR_FCVTASS : SInst<"vcvta_s32", "$s", "Sf">; -def SCALAR_FCVTAUS : SInst<"vcvta_u32", "bs", "Sf">; -def SCALAR_FCVTASD : SInst<"vcvta_s64", "$s", "Sd">; -def SCALAR_FCVTAUD : SInst<"vcvta_u64", "bs", "Sd">; -def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "$s", "Sf">; -def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "bs", "Sf">; -def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "$s", "Sd">; -def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "bs", "Sd">; -def SCALAR_FCVTZSS : SInst<"vcvt_s32", "$s", "Sf">; -def SCALAR_FCVTZUS : SInst<"vcvt_u32", "bs", "Sf">; -def SCALAR_FCVTZSD : SInst<"vcvt_s64", "$s", "Sd">; -def SCALAR_FCVTZUD : SInst<"vcvt_u64", "bs", "Sd">; +def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">; +def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">; +def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">; +def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">; +def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">; +def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">; +def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">; +def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">; +def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">; +def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">; +def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">; +def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">; +def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">; +def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">; +def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">; +def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">; +def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">; +def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">; +def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">; +def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">; +def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Estimate -def SCALAR_FRECPE : IInst<"vrecpe", "ss", "SfSd">; +def SCALAR_FRECPE : IInst<"vrecpe", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Exponent -def SCALAR_FRECPX : IInst<"vrecpx", "ss", "SfSd">; +def SCALAR_FRECPX : IInst<"vrecpx", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Reciprocal Square Root Estimate -def SCALAR_FRSQRTE : IInst<"vrsqrte", "ss", "SfSd">; +def SCALAR_FRSQRTE : IInst<"vrsqrte", "11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Integer Comparison -def SCALAR_CMEQ : SInst<"vceq", "sss", "SlSUl">; -def SCALAR_CMEQZ : SInst<"vceqz", "ss", "SlSUl">; -def SCALAR_CMGE : SInst<"vcge", "sss", "Sl">; -def SCALAR_CMGEZ : SInst<"vcgez", "ss", "Sl">; -def SCALAR_CMHS : SInst<"vcge", "sss", "SUl">; -def SCALAR_CMLE : SInst<"vcle", "sss", "SlSUl">; -def SCALAR_CMLEZ : SInst<"vclez", "ss", "Sl">; -def SCALAR_CMLT : SInst<"vclt", "sss", "SlSUl">; -def SCALAR_CMLTZ : SInst<"vcltz", "ss", "Sl">; -def SCALAR_CMGT : SInst<"vcgt", "sss", "Sl">; -def SCALAR_CMGTZ : SInst<"vcgtz", "ss", "Sl">; -def SCALAR_CMHI : SInst<"vcgt", "sss", "SUl">; -def SCALAR_CMTST : SInst<"vtst", "sss", "SlSUl">; +def SCALAR_CMEQ : SInst<"vceq", "111", "SlSUl">; +def SCALAR_CMEQZ : SInst<"vceqz", "11", "SlSUl">; +def SCALAR_CMGE : SInst<"vcge", "111", "Sl">; +def SCALAR_CMGEZ : SInst<"vcgez", "11", "Sl">; +def SCALAR_CMHS : SInst<"vcge", "111", "SUl">; +def SCALAR_CMLE : SInst<"vcle", "111", "SlSUl">; +def SCALAR_CMLEZ : SInst<"vclez", "11", "Sl">; +def SCALAR_CMLT : SInst<"vclt", "111", "SlSUl">; +def SCALAR_CMLTZ : SInst<"vcltz", "11", "Sl">; +def SCALAR_CMGT : SInst<"vcgt", "111", "Sl">; +def SCALAR_CMGTZ : SInst<"vcgtz", "11", "Sl">; +def SCALAR_CMHI : SInst<"vcgt", "111", "SUl">; +def SCALAR_CMTST : SInst<"vtst", "111", "SlSUl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Comparison -def SCALAR_FCMEQ : IInst<"vceq", "bss", "SfSd">; -def SCALAR_FCMEQZ : IInst<"vceqz", "bs", "SfSd">; -def SCALAR_FCMGE : IInst<"vcge", "bss", "SfSd">; -def SCALAR_FCMGEZ : IInst<"vcgez", "bs", "SfSd">; -def SCALAR_FCMGT : IInst<"vcgt", "bss", "SfSd">; -def SCALAR_FCMGTZ : IInst<"vcgtz", "bs", "SfSd">; -def SCALAR_FCMLE : IInst<"vcle", "bss", "SfSd">; -def SCALAR_FCMLEZ : IInst<"vclez", "bs", "SfSd">; -def SCALAR_FCMLT : IInst<"vclt", "bss", "SfSd">; -def SCALAR_FCMLTZ : IInst<"vcltz", "bs", "SfSd">; +def SCALAR_FCMEQ : IInst<"vceq", "(1U)11", "SfSd">; +def SCALAR_FCMEQZ : IInst<"vceqz", "(1U)1", "SfSd">; +def SCALAR_FCMGE : IInst<"vcge", "(1U)11", "SfSd">; +def SCALAR_FCMGEZ : IInst<"vcgez", "(1U)1", "SfSd">; +def SCALAR_FCMGT : IInst<"vcgt", "(1U)11", "SfSd">; +def SCALAR_FCMGTZ : IInst<"vcgtz", "(1U)1", "SfSd">; +def SCALAR_FCMLE : IInst<"vcle", "(1U)11", "SfSd">; +def SCALAR_FCMLEZ : IInst<"vclez", "(1U)1", "SfSd">; +def SCALAR_FCMLT : IInst<"vclt", "(1U)11", "SfSd">; +def SCALAR_FCMLTZ : IInst<"vcltz", "(1U)1", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal -def SCALAR_FACGE : IInst<"vcage", "bss", "SfSd">; -def SCALAR_FACLE : IInst<"vcale", "bss", "SfSd">; +def SCALAR_FACGE : IInst<"vcage", "(1U)11", "SfSd">; +def SCALAR_FACLE : IInst<"vcale", "(1U)11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Absolute Compare Mask Greater Than -def SCALAR_FACGT : IInst<"vcagt", "bss", "SfSd">; -def SCALAR_FACLT : IInst<"vcalt", "bss", "SfSd">; +def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "SfSd">; +def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Absolute Value -def SCALAR_ABS : SInst<"vabs", "ss", "Sl">; +def SCALAR_ABS : SInst<"vabs", "11", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Absolute Difference -def SCALAR_ABD : IInst<"vabd", "sss", "SfSd">; +def SCALAR_ABD : IInst<"vabd", "111", "SfSd">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Absolute Value -def SCALAR_SQABS : SInst<"vqabs", "ss", "ScSsSiSl">; +def SCALAR_SQABS : SInst<"vqabs", "11", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Negate -def SCALAR_NEG : SInst<"vneg", "ss", "Sl">; +def SCALAR_NEG : SInst<"vneg", "11", "Sl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Negate -def SCALAR_SQNEG : SInst<"vqneg", "ss", "ScSsSiSl">; +def SCALAR_SQNEG : SInst<"vqneg", "11", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Accumulated of Unsigned Value -def SCALAR_SUQADD : SInst<"vuqadd", "ssb", "ScSsSiSl">; +def SCALAR_SUQADD : SInst<"vuqadd", "11(1U)", "ScSsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Saturating Accumulated of Signed Value -def SCALAR_USQADD : SInst<"vsqadd", "ss$", "SUcSUsSUiSUl">; +def SCALAR_USQADD : SInst<"vsqadd", "11(1S)", "SUcSUsSUiSUl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply-Add Long -def SCALAR_SQDMLAL : SInst<"vqdmlal", "rrss", "SsSi">; +def SCALAR_SQDMLAL : SInst<"vqdmlal", "(1>)(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply-Subtract Long -def SCALAR_SQDMLSL : SInst<"vqdmlsl", "rrss", "SsSi">; +def SCALAR_SQDMLSL : SInst<"vqdmlsl", "(1>)(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Doubling Multiply Long -def SCALAR_SQDMULL : SInst<"vqdmull", "rss", "SsSi">; +def SCALAR_SQDMULL : SInst<"vqdmull", "(1>)11", "SsSi">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Extract Unsigned Narrow -def SCALAR_SQXTUN : SInst<"vqmovun", "zs", "SsSiSl">; +def SCALAR_SQXTUN : SInst<"vqmovun", "(1<)1", "SsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Signed Saturating Extract Narrow -def SCALAR_SQXTN : SInst<"vqmovn", "zs", "SsSiSl">; +def SCALAR_SQXTN : SInst<"vqmovn", "(1<)1", "SsSiSl">; //////////////////////////////////////////////////////////////////////////////// // Scalar Unsigned Saturating Extract Narrow -def SCALAR_UQXTN : SInst<"vqmovn", "zs", "SUsSUiSUl">; +def SCALAR_UQXTN : SInst<"vqmovn", "(1<)1", "SUsSUiSUl">; // Scalar Floating Point multiply (scalar, by element) -def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "ssdi", "SfSd", OP_SCALAR_MUL_LN>; -def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LN>; +def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "11.I", "SfSd", OP_SCALAR_MUL_LN>; +def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN>; // Scalar Floating Point multiply extended (scalar, by element) -def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "ssdi", "SfSd", OP_SCALAR_MULX_LN>; -def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LN>; +def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "11.I", "SfSd", OP_SCALAR_MULX_LN>; +def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN>; -def SCALAR_VMUL_N : IInst<"vmul_n", "dds", "d">; +def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">; // VMUL_LANE_A64 d type implemented using scalar mul lane -def SCALAR_VMUL_LANE : IInst<"vmul_lane", "ddgi", "d">; +def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">; // VMUL_LANEQ d type implemented using scalar mul lane -def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "ddji", "d"> { +def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d"> { let isLaneQ = 1; } // VMULX_LANE d type implemented using scalar vmulx_lane -def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "d", OP_SCALAR_VMULX_LN>; +def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>; // VMULX_LANEQ d type implemented using scalar vmulx_laneq -def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "d", OP_SCALAR_VMULX_LNQ>; +def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ>; // Scalar Floating Point fused multiply-add (scalar, by element) -def SCALAR_FMLA_LANE : IInst<"vfma_lane", "sssdi", "SfSd">; -def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "sssji", "SfSd">; +def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">; +def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd">; // Scalar Floating Point fused multiply-subtract (scalar, by element) -def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>; -def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>; +def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "111.I", "SfSd", OP_FMS_LN>; +def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ>; // Signed Saturating Doubling Multiply Long (scalar by element) -def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>; -def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LN>; +def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_QDMULL_LN>; +def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN>; // Signed Saturating Doubling Multiply-Add Long (scalar by element) -def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">; -def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "rrsji", "SsSi">; +def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">; +def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi">; // Signed Saturating Doubling Multiply-Subtract Long (scalar by element) -def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "rrsdi", "SsSi">; -def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "rrsji", "SsSi">; +def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">; +def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi">; // Scalar Integer Saturating Doubling Multiply Half High (scalar by element) -def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>; -def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LN>; +def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>; +def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN>; // Scalar Integer Saturating Rounding Doubling Multiply Half High -def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>; -def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>; +def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "11.I", "SsSi", OP_SCALAR_QRDMULH_LN>; +def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN>; let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half -def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLAH_LN>; -def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLAH_LN>; +def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>; +def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN>; // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half -def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLSH_LN>; -def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLSH_LN>; +def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_QRDMLSH_LN>; +def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN>; } -def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; -def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; +def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; +def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; } // ARMv8.2-A FP16 vector intrinsics for A32/A64. @@ -1441,234 +1442,234 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { // ARMv8.2-A FP16 one-operand vector intrinsics. // Comparison - def CMEQH : SInst<"vceqz", "ud", "hQh">; - def CMGEH : SInst<"vcgez", "ud", "hQh">; - def CMGTH : SInst<"vcgtz", "ud", "hQh">; - def CMLEH : SInst<"vclez", "ud", "hQh">; - def CMLTH : SInst<"vcltz", "ud", "hQh">; + def CMEQH : SInst<"vceqz", "U.", "hQh">; + def CMGEH : SInst<"vcgez", "U.", "hQh">; + def CMGTH : SInst<"vcgtz", "U.", "hQh">; + def CMLEH : SInst<"vclez", "U.", "hQh">; + def CMLTH : SInst<"vcltz", "U.", "hQh">; // Vector conversion - def VCVT_F16 : SInst<"vcvt_f16", "Hd", "sUsQsQUs">; - def VCVT_S16 : SInst<"vcvt_s16", "xd", "hQh">; - def VCVT_U16 : SInst<"vcvt_u16", "ud", "hQh">; - def VCVTA_S16 : SInst<"vcvta_s16", "xd", "hQh">; - def VCVTA_U16 : SInst<"vcvta_u16", "ud", "hQh">; - def VCVTM_S16 : SInst<"vcvtm_s16", "xd", "hQh">; - def VCVTM_U16 : SInst<"vcvtm_u16", "ud", "hQh">; - def VCVTN_S16 : SInst<"vcvtn_s16", "xd", "hQh">; - def VCVTN_U16 : SInst<"vcvtn_u16", "ud", "hQh">; - def VCVTP_S16 : SInst<"vcvtp_s16", "xd", "hQh">; - def VCVTP_U16 : SInst<"vcvtp_u16", "ud", "hQh">; + def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">; + def VCVT_S16 : SInst<"vcvt_s16", "S.", "hQh">; + def VCVT_U16 : SInst<"vcvt_u16", "U.", "hQh">; + def VCVTA_S16 : SInst<"vcvta_s16", "S.", "hQh">; + def VCVTA_U16 : SInst<"vcvta_u16", "U.", "hQh">; + def VCVTM_S16 : SInst<"vcvtm_s16", "S.", "hQh">; + def VCVTM_U16 : SInst<"vcvtm_u16", "U.", "hQh">; + def VCVTN_S16 : SInst<"vcvtn_s16", "S.", "hQh">; + def VCVTN_U16 : SInst<"vcvtn_u16", "U.", "hQh">; + def VCVTP_S16 : SInst<"vcvtp_s16", "S.", "hQh">; + def VCVTP_U16 : SInst<"vcvtp_u16", "U.", "hQh">; // Vector rounding let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { - def FRINTZH : SInst<"vrnd", "dd", "hQh">; - def FRINTNH : SInst<"vrndn", "dd", "hQh">; - def FRINTAH : SInst<"vrnda", "dd", "hQh">; - def FRINTPH : SInst<"vrndp", "dd", "hQh">; - def FRINTMH : SInst<"vrndm", "dd", "hQh">; - def FRINTXH : SInst<"vrndx", "dd", "hQh">; + def FRINTZH : SInst<"vrnd", "..", "hQh">; + def FRINTNH : SInst<"vrndn", "..", "hQh">; + def FRINTAH : SInst<"vrnda", "..", "hQh">; + def FRINTPH : SInst<"vrndp", "..", "hQh">; + def FRINTMH : SInst<"vrndm", "..", "hQh">; + def FRINTXH : SInst<"vrndx", "..", "hQh">; } // Misc. - def VABSH : SInst<"vabs", "dd", "hQh">; - def VNEGH : SOpInst<"vneg", "dd", "hQh", OP_NEG>; - def VRECPEH : SInst<"vrecpe", "dd", "hQh">; - def FRSQRTEH : SInst<"vrsqrte", "dd", "hQh">; + def VABSH : SInst<"vabs", "..", "hQh">; + def VNEGH : SOpInst<"vneg", "..", "hQh", OP_NEG>; + def VRECPEH : SInst<"vrecpe", "..", "hQh">; + def FRSQRTEH : SInst<"vrsqrte", "..", "hQh">; // ARMv8.2-A FP16 two-operands vector intrinsics. // Misc. - def VADDH : SOpInst<"vadd", "ddd", "hQh", OP_ADD>; - def VABDH : SInst<"vabd", "ddd", "hQh">; - def VSUBH : SOpInst<"vsub", "ddd", "hQh", OP_SUB>; + def VADDH : SOpInst<"vadd", "...", "hQh", OP_ADD>; + def VABDH : SInst<"vabd", "...", "hQh">; + def VSUBH : SOpInst<"vsub", "...", "hQh", OP_SUB>; // Comparison let InstName = "vacge" in { - def VCAGEH : SInst<"vcage", "udd", "hQh">; - def VCALEH : SInst<"vcale", "udd", "hQh">; + def VCAGEH : SInst<"vcage", "U..", "hQh">; + def VCALEH : SInst<"vcale", "U..", "hQh">; } let InstName = "vacgt" in { - def VCAGTH : SInst<"vcagt", "udd", "hQh">; - def VCALTH : SInst<"vcalt", "udd", "hQh">; + def VCAGTH : SInst<"vcagt", "U..", "hQh">; + def VCALTH : SInst<"vcalt", "U..", "hQh">; } - def VCEQH : SOpInst<"vceq", "udd", "hQh", OP_EQ>; - def VCGEH : SOpInst<"vcge", "udd", "hQh", OP_GE>; - def VCGTH : SOpInst<"vcgt", "udd", "hQh", OP_GT>; + def VCEQH : SOpInst<"vceq", "U..", "hQh", OP_EQ>; + def VCGEH : SOpInst<"vcge", "U..", "hQh", OP_GE>; + def VCGTH : SOpInst<"vcgt", "U..", "hQh", OP_GT>; let InstName = "vcge" in - def VCLEH : SOpInst<"vcle", "udd", "hQh", OP_LE>; + def VCLEH : SOpInst<"vcle", "U..", "hQh", OP_LE>; let InstName = "vcgt" in - def VCLTH : SOpInst<"vclt", "udd", "hQh", OP_LT>; + def VCLTH : SOpInst<"vclt", "U..", "hQh", OP_LT>; // Vector conversion let isVCVT_N = 1 in { - def VCVT_N_F16 : SInst<"vcvt_n_f16", "Hdi", "sUsQsQUs">; - def VCVT_N_S16 : SInst<"vcvt_n_s16", "xdi", "hQh">; - def VCVT_N_U16 : SInst<"vcvt_n_u16", "udi", "hQh">; + def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">; + def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">; + def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">; } // Max/Min - def VMAXH : SInst<"vmax", "ddd", "hQh">; - def VMINH : SInst<"vmin", "ddd", "hQh">; + def VMAXH : SInst<"vmax", "...", "hQh">; + def VMINH : SInst<"vmin", "...", "hQh">; let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { - def FMAXNMH : SInst<"vmaxnm", "ddd", "hQh">; - def FMINNMH : SInst<"vminnm", "ddd", "hQh">; + def FMAXNMH : SInst<"vmaxnm", "...", "hQh">; + def FMINNMH : SInst<"vminnm", "...", "hQh">; } // Multiplication/Division - def VMULH : SOpInst<"vmul", "ddd", "hQh", OP_MUL>; + def VMULH : SOpInst<"vmul", "...", "hQh", OP_MUL>; // Pairwise addition - def VPADDH : SInst<"vpadd", "ddd", "h">; + def VPADDH : SInst<"vpadd", "...", "h">; // Pairwise Max/Min - def VPMAXH : SInst<"vpmax", "ddd", "h">; - def VPMINH : SInst<"vpmin", "ddd", "h">; + def VPMAXH : SInst<"vpmax", "...", "h">; + def VPMINH : SInst<"vpmin", "...", "h">; // Reciprocal/Sqrt - def VRECPSH : SInst<"vrecps", "ddd", "hQh">; - def VRSQRTSH : SInst<"vrsqrts", "ddd", "hQh">; + def VRECPSH : SInst<"vrecps", "...", "hQh">; + def VRSQRTSH : SInst<"vrsqrts", "...", "hQh">; // ARMv8.2-A FP16 three-operands vector intrinsics. // Vector fused multiply-add operations - def VFMAH : SInst<"vfma", "dddd", "hQh">; - def VFMSH : SOpInst<"vfms", "dddd", "hQh", OP_FMLS>; + def VFMAH : SInst<"vfma", "....", "hQh">; + def VFMSH : SOpInst<"vfms", "....", "hQh", OP_FMLS>; // ARMv8.2-A FP16 lane vector intrinsics. // Mul lane - def VMUL_LANEH : IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>; - def VMUL_NH : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>; + def VMUL_LANEH : IOpInst<"vmul_lane", "..qI", "hQh", OP_MUL_LN>; + def VMUL_NH : IOpInst<"vmul_n", "..1", "hQh", OP_MUL_N>; // Data processing intrinsics - section 5 // Logical operations let isHiddenLInst = 1 in - def VBSLH : SInst<"vbsl", "dudd", "hQh">; + def VBSLH : SInst<"vbsl", ".U..", "hQh">; // Transposition operations - def VZIPH : WInst<"vzip", "2dd", "hQh">; - def VUZPH : WInst<"vuzp", "2dd", "hQh">; - def VTRNH : WInst<"vtrn", "2dd", "hQh">; + def VZIPH : WInst<"vzip", "2..", "hQh">; + def VUZPH : WInst<"vuzp", "2..", "hQh">; + def VTRNH : WInst<"vtrn", "2..", "hQh">; let ArchGuard = "!defined(__aarch64__)" in { // Set all lanes to same value. // Already implemented prior to ARMv8.2-A. - def VMOV_NH : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>; - def VDUP_NH : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>; - def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>; + def VMOV_NH : WOpInst<"vmov_n", ".1", "hQh", OP_DUP>; + def VDUP_NH : WOpInst<"vdup_n", ".1", "hQh", OP_DUP>; + def VDUP_LANE1H : WOpInst<"vdup_lane", ".qI", "hQh", OP_DUP_LN>; } // Vector Extract - def VEXTH : WInst<"vext", "dddi", "hQh">; + def VEXTH : WInst<"vext", "...I", "hQh">; // Reverse vector elements - def VREV64H : WOpInst<"vrev64", "dd", "hQh", OP_REV64>; + def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; } // ARMv8.2-A FP16 vector intrinsics for A64 only. let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in { // Vector rounding - def FRINTIH : SInst<"vrndi", "dd", "hQh">; + def FRINTIH : SInst<"vrndi", "..", "hQh">; // Misc. - def FSQRTH : SInst<"vsqrt", "dd", "hQh">; + def FSQRTH : SInst<"vsqrt", "..", "hQh">; // Multiplication/Division - def MULXH : SInst<"vmulx", "ddd", "hQh">; - def FDIVH : IOpInst<"vdiv", "ddd", "hQh", OP_DIV>; + def MULXH : SInst<"vmulx", "...", "hQh">; + def FDIVH : IOpInst<"vdiv", "...", "hQh", OP_DIV>; // Pairwise addition - def VPADDH1 : SInst<"vpadd", "ddd", "Qh">; + def VPADDH1 : SInst<"vpadd", "...", "Qh">; // Pairwise Max/Min - def VPMAXH1 : SInst<"vpmax", "ddd", "Qh">; - def VPMINH1 : SInst<"vpmin", "ddd", "Qh">; + def VPMAXH1 : SInst<"vpmax", "...", "Qh">; + def VPMINH1 : SInst<"vpmin", "...", "Qh">; // Pairwise MaxNum/MinNum - def FMAXNMPH : SInst<"vpmaxnm", "ddd", "hQh">; - def FMINNMPH : SInst<"vpminnm", "ddd", "hQh">; + def FMAXNMPH : SInst<"vpmaxnm", "...", "hQh">; + def FMINNMPH : SInst<"vpminnm", "...", "hQh">; // ARMv8.2-A FP16 lane vector intrinsics. // FMA lane - def VFMA_LANEH : IInst<"vfma_lane", "dddgi", "hQh">; - def VFMA_LANEQH : IInst<"vfma_laneq", "dddji", "hQh">; + def VFMA_LANEH : IInst<"vfma_lane", "...qI", "hQh">; + def VFMA_LANEQH : IInst<"vfma_laneq", "...QI", "hQh">; // FMA lane with scalar argument - def FMLA_NH : SOpInst<"vfma_n", "ddds", "hQh", OP_FMLA_N>; + def FMLA_NH : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>; // Scalar floating point fused multiply-add (scalar, by element) - def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "sssdi", "Sh">; - def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "sssji", "Sh">; + def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "111.I", "Sh">; + def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh">; // FMS lane - def VFMS_LANEH : IOpInst<"vfms_lane", "dddgi", "hQh", OP_FMS_LN>; - def VFMS_LANEQH : IOpInst<"vfms_laneq", "dddji", "hQh", OP_FMS_LNQ>; + def VFMS_LANEH : IOpInst<"vfms_lane", "...qI", "hQh", OP_FMS_LN>; + def VFMS_LANEQH : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ>; // FMS lane with scalar argument - def FMLS_NH : SOpInst<"vfms_n", "ddds", "hQh", OP_FMLS_N>; + def FMLS_NH : SOpInst<"vfms_n", "...1", "hQh", OP_FMLS_N>; // Scalar floating foint fused multiply-subtract (scalar, by element) - def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "sssdi", "Sh", OP_FMS_LN>; - def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "sssji", "Sh", OP_FMS_LNQ>; + def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "111.I", "Sh", OP_FMS_LN>; + def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ>; // Mul lane - def VMUL_LANEQH : IOpInst<"vmul_laneq", "ddji", "hQh", OP_MUL_LN>; + def VMUL_LANEQH : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN>; // Scalar floating point multiply (scalar, by element) - def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>; - def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>; + def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "11.I", "Sh", OP_SCALAR_MUL_LN>; + def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN>; // Mulx lane - def VMULX_LANEH : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>; - def VMULX_LANEQH : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>; - def VMULX_NH : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>; + def VMULX_LANEH : IOpInst<"vmulx_lane", "..qI", "hQh", OP_MULX_LN>; + def VMULX_LANEQH : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN>; + def VMULX_NH : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>; // Scalar floating point mulx (scalar, by element) - def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "ssdi", "Sh">; - def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "ssji", "Sh">; + def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">; + def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh">; // ARMv8.2-A FP16 reduction vector intrinsics. - def VMAXVH : SInst<"vmaxv", "sd", "hQh">; - def VMINVH : SInst<"vminv", "sd", "hQh">; - def FMAXNMVH : SInst<"vmaxnmv", "sd", "hQh">; - def FMINNMVH : SInst<"vminnmv", "sd", "hQh">; + def VMAXVH : SInst<"vmaxv", "1.", "hQh">; + def VMINVH : SInst<"vminv", "1.", "hQh">; + def FMAXNMVH : SInst<"vmaxnmv", "1.", "hQh">; + def FMINNMVH : SInst<"vminnmv", "1.", "hQh">; // Permutation - def VTRN1H : SOpInst<"vtrn1", "ddd", "hQh", OP_TRN1>; - def VZIP1H : SOpInst<"vzip1", "ddd", "hQh", OP_ZIP1>; - def VUZP1H : SOpInst<"vuzp1", "ddd", "hQh", OP_UZP1>; - def VTRN2H : SOpInst<"vtrn2", "ddd", "hQh", OP_TRN2>; - def VZIP2H : SOpInst<"vzip2", "ddd", "hQh", OP_ZIP2>; - def VUZP2H : SOpInst<"vuzp2", "ddd", "hQh", OP_UZP2>; - - def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "sdi", "Sh">; - def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "sji", "Sh">; + def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>; + def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>; + def VUZP1H : SOpInst<"vuzp1", "...", "hQh", OP_UZP1>; + def VTRN2H : SOpInst<"vtrn2", "...", "hQh", OP_TRN2>; + def VZIP2H : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>; + def VUZP2H : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>; + + def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "1.I", "Sh">; + def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh">; } // v8.2-A dot product instructions. let ArchGuard = "defined(__ARM_FEATURE_DOTPROD)" in { - def DOT : SInst<"vdot", "dd88", "iQiUiQUi">; - def DOT_LANE : SOpInst<"vdot_lane", "dd87i", "iUiQiQUi", OP_DOT_LN>; + def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">; + def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<; } let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in { // Variants indexing into a 128-bit vector are A64 only. - def UDOT_LANEQ : SOpInst<"vdot_laneq", "dd89i", "iUiQiQUi", OP_DOT_LNQ>; + def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<; } // v8.2-A FP16 fused multiply-add long instructions. let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in { - def VFMLAL_LOW : SInst<"vfmlal_low", "nndd", "hQh">; - def VFMLSL_LOW : SInst<"vfmlsl_low", "nndd", "hQh">; - def VFMLAL_HIGH : SInst<"vfmlal_high", "nndd", "hQh">; - def VFMLSL_HIGH : SInst<"vfmlsl_high", "nndd", "hQh">; - - def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "ffH0i", "hQh", OP_FMLAL_LN>; - def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "ffH0i", "hQh", OP_FMLSL_LN>; - def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "ffH0i", "hQh", OP_FMLAL_LN_Hi>; - def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "ffH0i", "hQh", OP_FMLSL_LN_Hi>; - - def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "ffH1i", "hQh", OP_FMLAL_LN>; - def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "ffH1i", "hQh", OP_FMLSL_LN>; - def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "ffH1i", "hQh", OP_FMLAL_LN_Hi>; - def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "ffH1i", "hQh", OP_FMLSL_LN_Hi>; + def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">; + def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">; + def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">; + def VFMLSL_HIGH : SInst<"vfmlsl_high", ">>..", "hQh">; + + def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN>; + def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN>; + def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN_Hi>; + def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN_Hi>; + + def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN>; + def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN>; + def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>; + def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>; } diff --git a/clang/include/clang/Basic/arm_neon_incl.td b/clang/include/clang/Basic/arm_neon_incl.td index 984ed787037f6..28b00d162a00d 100644 --- a/clang/include/clang/Basic/arm_neon_incl.td +++ b/clang/include/clang/Basic/arm_neon_incl.td @@ -198,10 +198,8 @@ def OP_UNAVAILABLE : Operation { // // The prototype is a string that defines the return type of the intrinsic // and the type of each argument. The return type and every argument gets a -// "modifier" that can change in some way the "base type" of the intrinsic. -// -// The modifier 'd' means "default" and does not modify the base type in any -// way. The available modifiers are given below. +// set of "modifiers" that can change in some way the "base type" of the +// intrinsic. // // Typespecs // --------- @@ -226,41 +224,34 @@ def OP_UNAVAILABLE : Operation { // ------------------- // prototype: return (arg, arg, ...) // -// v: void -// t: best-fit integer (int/poly args) -// x: signed integer (int/float args) -// u: unsigned integer (int/float args) -// f: float (int args) -// F: double (int args) -// H: half (int args) -// 0: half (int args), ignore 'Q' size modifier. -// 1: half (int args), force 'Q' size modifier. -// d: default -// g: default, ignore 'Q' size modifier. -// j: default, force 'Q' size modifier. -// w: double width elements, same num elts -// n: double width elements, half num elts -// h: half width elements, double num elts -// q: half width elements, quad num elts -// e: half width elements, double num elts, unsigned -// m: half width elements, same num elts -// i: constant int -// l: constant uint64 -// s: scalar of element type -// z: scalar of half width element type, signed -// r: scalar of double width element type, signed -// b: scalar of unsigned integer/long type (int/float args) -// $: scalar of signed integer/long type (int/float args) -// y: scalar of float -// o: scalar of double -// k: default elt width, double num elts -// 2,3,4: array of default vectors -// B,C,D: array of default elts, force 'Q' size modifier. -// p: pointer type -// c: const pointer type -// 7: vector of 8-bit elements, ignore 'Q' size modifier -// 8: vector of 8-bit elements, same width as default type -// 9: vector of 8-bit elements, force 'Q' size modifier +// Each type modifier is either a single character, or a group surrounded by +// parentheses. +// +// .: default +// v: change to void category. +// S: change to signed integer category. +// U: change to unsigned integer category. +// F: change to floating category. +// P: change to polynomial category. +// p: change polynomial to equivalent integer category. Otherwise nop. +// +// >: double element width (vector size unchanged). +// <: half element width (vector size unchanged). +// +// 1: change to scalar. +// 2: change to struct of two vectors. +// 3: change to struct of three vectors. +// 4: change to struct of four vectors. +// +// *: make a pointer argument. +// c: make a constant argument (for pointers). +// +// Q: force 128-bit width. +// q: force 64-bit width. +// +// I: make 32-bit signed scalar immediate +// !: make this the key type passed to CGBuiltin.cpp in a polymorphic call. + // Every intrinsic subclasses Inst. class Inst { diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index b29d877dd8eca..7744b4f4a159d 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -17756,8 +17756,6 @@ float32_t test_vminnmv_f32(float32x2_t a) { } // CHECK-LABEL: @test_vpaddq_s64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] @@ -17766,8 +17764,6 @@ int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vpaddq_u64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c index 5c34d1c37de09..0ec1198f99016 100644 --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -2147,6 +2147,13 @@ int8x8_t test_vcreate_s8(uint64_t a) { return vclz_s8(vcreate_s8(a)); } +// CHECK-LABEL: @test_vcreate_imm +// CHECK: [[RES:%.*]] = bitcast i64 0 to <4 x i16> +// CHECK: ret <4 x i16> [[RES]] +int16x4_t test_vcreate_imm(void) { + return vcreate_s16(0); +} + // CHECK-LABEL: @test_vcreate_s16( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index cdf761b00c61c..a0f3fb2ddc089 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -161,11 +161,11 @@ class Type { Pointer(false), ScalarForMangling(false), NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {} - Type(TypeSpec TS, char CharMod) + Type(TypeSpec TS, StringRef CharMods) : TS(std::move(TS)), Kind(Void), Immediate(false), Constant(false), Pointer(false), ScalarForMangling(false), NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) { - applyModifier(CharMod); + applyModifiers(CharMods); } /// Returns a type representing "void". @@ -181,13 +181,15 @@ class Type { bool noManglingQ() const { return NoManglingQ; } bool isPointer() const { return Pointer; } + bool isValue() const { return !isVoid() && !isPointer(); } + bool isScalar() const { return isValue() && NumVectors == 0; } + bool isVector() const { return isValue() && NumVectors > 0; } + bool isConstPointer() const { return Constant; } bool isFloating() const { return Kind == Float; } bool isInteger() const { return Kind == SInt || Kind == UInt; } bool isPoly() const { return Kind == Poly; } bool isSigned() const { return Kind == SInt; } bool isImmediate() const { return Immediate; } - bool isScalar() const { return NumVectors == 0; } - bool isVector() const { return NumVectors > 0; } bool isFloat() const { return isFloating() && ElementBitwidth == 32; } bool isDouble() const { return isFloating() && ElementBitwidth == 64; } bool isHalf() const { return isFloating() && ElementBitwidth == 16; } @@ -205,11 +207,11 @@ class Type { // Mutator functions // void makeUnsigned() { - assert(isInteger() && "not a potentially signed type"); + assert(!isVoid() && "not a potentially signed type"); Kind = UInt; } void makeSigned() { - assert(isInteger() && "not a potentially signed type"); + assert(!isVoid() && "not a potentially signed type"); Kind = SInt; } @@ -267,8 +269,8 @@ class Type { /// seen. This is needed by applyModifier as some modifiers /// only take effect if the type size was changed by "Q" or "H". void applyTypespec(bool &Quad); - /// Applies a prototype modifiers to the type. - void applyModifier(char Mod); + /// Applies prototype modifiers to the type. + void applyModifiers(StringRef Mods); }; //===----------------------------------------------------------------------===// @@ -299,8 +301,8 @@ class Intrinsic { /// The Record this intrinsic was created from. Record *R; - /// The unmangled name and prototype. - std::string Name, Proto; + /// The unmangled name. + std::string Name; /// The input and output typespecs. InTS == OutTS except when /// CartesianProductOfTypes is 1 - this is the case for vreinterpret. TypeSpec OutTS, InTS; @@ -323,6 +325,8 @@ class Intrinsic { /// The types of return value [0] and parameters [1..]. std::vector Types; + /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls. + int PolymorphicKeyType; /// The local variables defined. std::map Variables; /// NeededEarly - set if any other intrinsic depends on this intrinsic. @@ -358,34 +362,39 @@ class Intrinsic { Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS, TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter, StringRef Guard, bool IsUnavailable, bool BigEndianSafe) - : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS), - CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable), - BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false), - BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) { - // If this builtin takes an immediate argument, we need to #define it rather - // than use a standard declaration, so that SemaChecking can range check - // the immediate passed by the user. - if (Proto.find('i') != std::string::npos) - UseMacro = true; - - // Pointer arguments need to use macros to avoid hiding aligned attributes - // from the pointer type. - if (Proto.find('p') != std::string::npos || - Proto.find('c') != std::string::npos) - UseMacro = true; - - // It is not permitted to pass or return an __fp16 by value, so intrinsics - // taking a scalar float16_t must be implemented as macros. - if (OutTS.find('h') != std::string::npos && - Proto.find('s') != std::string::npos) - UseMacro = true; - + : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body), + Guard(Guard.str()), IsUnavailable(IsUnavailable), + BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false), + UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."), + Emitter(Emitter) { // Modify the TypeSpec per-argument to get a concrete Type, and create // known variables for each. // Types[0] is the return value. - Types.emplace_back(OutTS, Proto[0]); - for (unsigned I = 1; I < Proto.size(); ++I) - Types.emplace_back(InTS, Proto[I]); + unsigned Pos = 0; + Types.emplace_back(OutTS, getNextModifiers(Proto, Pos)); + StringRef Mods = getNextModifiers(Proto, Pos); + while (!Mods.empty()) { + Types.emplace_back(InTS, Mods); + if (Mods.find("!") != StringRef::npos) + PolymorphicKeyType = Types.size() - 1; + + Mods = getNextModifiers(Proto, Pos); + } + + for (auto Type : Types) { + // If this builtin takes an immediate argument, we need to #define it rather + // than use a standard declaration, so that SemaChecking can range check + // the immediate passed by the user. + + // Pointer arguments need to use macros to avoid hiding aligned attributes + // from the pointer type. + + // It is not permitted to pass or return an __fp16 by value, so intrinsics + // taking a scalar float16_t must be implemented as macros. + if (Type.isImmediate() || Type.isPointer() || + (Type.isScalar() && Type.isHalf())) + UseMacro = true; + } } /// Get the Record that this intrinsic is based off. @@ -401,23 +410,24 @@ class Intrinsic { /// Return true if the intrinsic takes an immediate operand. bool hasImmediate() const { - return Proto.find('i') != std::string::npos; + return std::any_of(Types.begin(), Types.end(), + [](const Type &T) { return T.isImmediate(); }); } /// Return the parameter index of the immediate operand. unsigned getImmediateIdx() const { - assert(hasImmediate()); - unsigned Idx = Proto.find('i'); - assert(Idx > 0 && "Can't return an immediate!"); - return Idx - 1; + for (unsigned Idx = 0; Idx < Types.size(); ++Idx) + if (Types[Idx].isImmediate()) + return Idx - 1; + llvm_unreachable("Intrinsic has no immediate"); } - unsigned getNumParams() const { return Proto.size() - 1; } + + unsigned getNumParams() const { return Types.size() - 1; } Type getReturnType() const { return Types[0]; } Type getParamType(unsigned I) const { return Types[I + 1]; } Type getBaseType() const { return BaseType; } - /// Return the raw prototype string. - std::string getProto() const { return Proto; } + Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; } /// Return true if the prototype has a scalar argument. bool protoHasScalar() const; @@ -471,6 +481,8 @@ class Intrinsic { void indexBody(); private: + StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const; + std::string mangleName(std::string Name, ClassKind CK) const; void initVariables(); @@ -614,10 +626,14 @@ std::string Type::builtin_str() const { if (isVoid()) return "v"; - if (Pointer) + if (isPointer()) { // All pointers are void pointers. - S += "v"; - else if (isInteger()) + S = "v"; + if (isConstPointer()) + S += "C"; + S += "*"; + return S; + } else if (isInteger()) switch (ElementBitwidth) { case 8: S += "c"; break; case 16: S += "s"; break; @@ -634,10 +650,11 @@ std::string Type::builtin_str() const { default: llvm_unreachable("Unhandled case!"); } + // FIXME: NECESSARY??????????????????????????????????????????????????????????????????????? if (isChar() && !isPointer() && isSigned()) // Make chars explicitly signed. S = "S" + S; - else if (!isPointer() && isInteger() && !isSigned()) + else if (isInteger() && !isSigned()) S = "U" + S; // Constant indices are "int", but have the "constant expression" modifier. @@ -646,11 +663,8 @@ std::string Type::builtin_str() const { S = "I" + S; } - if (isScalar()) { - if (Constant) S += "C"; - if (Pointer) S += "*"; + if (isScalar()) return S; - } std::string Ret; for (unsigned I = 0; I < NumVectors; ++I) @@ -812,202 +826,77 @@ void Type::applyTypespec(bool &Quad) { Bitwidth = Quad ? 128 : 64; } -void Type::applyModifier(char Mod) { +void Type::applyModifiers(StringRef Mods) { bool AppliedQuad = false; applyTypespec(AppliedQuad); - switch (Mod) { - case 'v': - Kind = Void; - break; - case 't': - if (isPoly()) + for (char Mod : Mods) { + switch (Mod) { + case '.': + break; + case 'v': + Kind = Void; + break; + case 'S': + Kind = SInt; + break; + case 'U': Kind = UInt; - break; - case 'b': - Kind = UInt; - NumVectors = 0; - Bitwidth = ElementBitwidth; - break; - case '$': - Kind = SInt; - NumVectors = 0; - Bitwidth = ElementBitwidth; - break; - case 'u': - Kind = UInt; - break; - case 'x': - assert(!isPoly() && "'u' can't be used with poly types!"); - Kind = SInt; - break; - case 'o': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = Float; - break; - case 'y': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = Float; - break; - case 'Y': - Bitwidth = ElementBitwidth = 16; - NumVectors = 0; - Kind = Float; - break; - case 'I': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = SInt; - break; - case 'L': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = SInt; - break; - case 'U': - Bitwidth = ElementBitwidth = 32; - NumVectors = 0; - Kind = UInt; - break; - case 'O': - Bitwidth = ElementBitwidth = 64; - NumVectors = 0; - Kind = UInt; - break; - case 'f': - Kind = Float; - ElementBitwidth = 32; - break; - case 'F': - Kind = Float; - ElementBitwidth = 64; - break; - case 'H': - Kind = Float; - ElementBitwidth = 16; - break; - case '0': - Kind = Float; - if (AppliedQuad) - Bitwidth /= 2; - ElementBitwidth = 16; - break; - case '1': - Kind = Float; - if (!AppliedQuad) - Bitwidth *= 2; - ElementBitwidth = 16; - break; - case 'g': - if (AppliedQuad) - Bitwidth /= 2; - break; - case 'j': - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'w': - ElementBitwidth *= 2; - Bitwidth *= 2; - break; - case 'n': - ElementBitwidth *= 2; - break; - case 'i': - Kind = SInt; - ElementBitwidth = Bitwidth = 32; - NumVectors = 0; - Immediate = true; - break; - case 'l': - Kind = UInt; - ElementBitwidth = Bitwidth = 64; - NumVectors = 0; - Immediate = true; - break; - case 'z': - ElementBitwidth /= 2; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'r': - ElementBitwidth *= 2; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 's': - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'k': - Bitwidth *= 2; - break; - case 'c': - Constant = true; - LLVM_FALLTHROUGH; - case 'p': - Pointer = true; - Bitwidth = ElementBitwidth; - NumVectors = 0; - break; - case 'h': - ElementBitwidth /= 2; - break; - case 'q': - ElementBitwidth /= 2; - Bitwidth *= 2; - break; - case 'e': - ElementBitwidth /= 2; - Kind = UInt; - break; - case 'm': - ElementBitwidth /= 2; - Bitwidth /= 2; - break; - case 'd': - break; - case '2': - NumVectors = 2; - break; - case '3': - NumVectors = 3; - break; - case '4': - NumVectors = 4; - break; - case 'B': - NumVectors = 2; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'C': - NumVectors = 3; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case 'D': - NumVectors = 4; - if (!AppliedQuad) - Bitwidth *= 2; - break; - case '7': - if (AppliedQuad) - Bitwidth /= 2; - ElementBitwidth = 8; - break; - case '8': - ElementBitwidth = 8; - break; - case '9': - if (!AppliedQuad) - Bitwidth *= 2; - ElementBitwidth = 8; - break; - default: - llvm_unreachable("Unhandled character!"); + break; + case 'F': + Kind = Float; + break; + case 'P': + Kind = Poly; + break; + case '>': + assert(ElementBitwidth < 128); + ElementBitwidth *= 2; + break; + case '<': + assert(ElementBitwidth > 8); + ElementBitwidth /= 2; + break; + case '1': + NumVectors = 0; + break; + case '2': + NumVectors = 2; + break; + case '3': + NumVectors = 3; + break; + case '4': + NumVectors = 4; + break; + case '*': + Pointer = true; + break; + case 'c': + Constant = true; + break; + case 'Q': + Bitwidth = 128; + break; + case 'q': + Bitwidth = 64; + break; + case 'I': + Kind = SInt; + ElementBitwidth = Bitwidth = 32; + NumVectors = 0; + Immediate = true; + break; + case 'p': + if (isPoly()) + Kind = UInt; + break; + case '!': + // Key type, handled elsewhere. + break; + default: + llvm_unreachable("Unhandled character!"); + } } } @@ -1015,6 +904,19 @@ void Type::applyModifier(char Mod) { // Intrinsic implementation //===----------------------------------------------------------------------===// +StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const { + if (Proto.size() == Pos) + return StringRef(); + else if (Proto[Pos] != '(') + return Proto.substr(Pos++, 1); + + size_t Start = Pos + 1; + size_t End = Proto.find(')', Start); + assert_with_loc(End != StringRef::npos, "unmatched modifier group paren"); + Pos = End + 1; + return Proto.slice(Start, End); +} + std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { char typeCode = '\0'; bool printNumber = true; @@ -1053,17 +955,13 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const { return S; } -static bool isFloatingPointProtoModifier(char Mod) { - return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I'; -} - std::string Intrinsic::getBuiltinTypeStr() { ClassKind LocalCK = getClassKind(true); std::string S; Type RetT = getReturnType(); if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() && - !RetT.isFloating() && !RetT.isVoid()) + !RetT.isFloating()) RetT.makeInteger(RetT.getElementSizeInBits(), false); // Since the return value must be one type, return a vector type of the @@ -1078,7 +976,7 @@ std::string Intrinsic::getBuiltinTypeStr() { if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned()) RetT.makeSigned(); - if (LocalCK == ClassB && !RetT.isVoid() && !RetT.isScalar()) + if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar()) // Cast to vector of 8-bit elements. RetT.makeInteger(8, true); @@ -1194,7 +1092,7 @@ void Intrinsic::initVariables() { // Modify the TypeSpec per-argument to get a concrete Type, and create // known variables for each. - for (unsigned I = 1; I < Proto.size(); ++I) { + for (unsigned I = 1; I < Types.size(); ++I) { char NameC = '0' + (I - 1); std::string Name = "p"; Name.push_back(NameC); @@ -1315,7 +1213,7 @@ void Intrinsic::emitShadowedArgs() { for (unsigned I = 0; I < getNumParams(); ++I) { // Do not create a temporary for an immediate argument. // That would defeat the whole point of using a macro! - if (hasImmediate() && Proto[I+1] == 'i') + if (getParamType(I).isImmediate()) continue; // Do not create a temporary for pointer arguments. The input // pointer may have an alignment hint. @@ -1339,13 +1237,9 @@ void Intrinsic::emitShadowedArgs() { } bool Intrinsic::protoHasScalar() const { - return (Proto.find('s') != std::string::npos || - Proto.find('z') != std::string::npos || - Proto.find('r') != std::string::npos || - Proto.find('b') != std::string::npos || - Proto.find('$') != std::string::npos || - Proto.find('y') != std::string::npos || - Proto.find('o') != std::string::npos); + return std::any_of(Types.begin(), Types.end(), [](const Type &T) { + return T.isScalar() && !T.isImmediate(); + }); } void Intrinsic::emitBodyAsBuiltinCall() { @@ -1408,13 +1302,7 @@ void Intrinsic::emitBodyAsBuiltinCall() { // Extra constant integer to hold type class enum for this function, e.g. s8 if (getClassKind(true) == ClassB) { - Type ThisTy = getReturnType(); - if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0])) - ThisTy = getParamType(0); - if (ThisTy.isPointer()) - ThisTy = getParamType(1); - - S += utostr(ThisTy.getNeonEnum()); + S += utostr(getPolymorphicKeyType().getNeonEnum()); } else { // Remove extraneous ", ". S.pop_back(); @@ -2019,9 +1907,9 @@ void NeonEmitter::createIntrinsic(Record *R, std::vector> NewTypeSpecs; for (auto TS : TypeSpecs) { if (CartesianProductOfTypes) { - Type DefaultT(TS, 'd'); + Type DefaultT(TS, "."); for (auto SrcTS : TypeSpecs) { - Type DefaultSrcT(SrcTS, 'd'); + Type DefaultSrcT(SrcTS, "."); if (TS == SrcTS || DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits()) continue; @@ -2101,31 +1989,19 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, continue; uint64_t Mask = 0ULL; - Type Ty = Def->getReturnType(); - if (Def->getProto()[0] == 'v' || - isFloatingPointProtoModifier(Def->getProto()[0])) - Ty = Def->getParamType(0); - if (Ty.isPointer()) - Ty = Def->getParamType(1); - - Mask |= 1ULL << Ty.getNeonEnum(); + Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum(); // Check if the function has a pointer or const pointer argument. - std::string Proto = Def->getProto(); int PtrArgNum = -1; bool HasConstPtr = false; for (unsigned I = 0; I < Def->getNumParams(); ++I) { - char ArgType = Proto[I + 1]; - if (ArgType == 'c') { - HasConstPtr = true; + const auto &Type = Def->getParamType(I); + if (Type.isPointer()) { PtrArgNum = I; - break; - } - if (ArgType == 'p') { - PtrArgNum = I; - break; + HasConstPtr = Type.isConstPointer(); } } + // For sret builtins, adjust the pointer argument index. if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1) PtrArgNum += 1; @@ -2349,7 +2225,7 @@ void NeonEmitter::run(raw_ostream &OS) { bool InIfdef = false; for (auto &TS : TDTypeVec) { bool IsA64 = false; - Type T(TS, 'd'); + Type T(TS, "."); if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) IsA64 = true; @@ -2382,7 +2258,7 @@ void NeonEmitter::run(raw_ostream &OS) { for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) { for (auto &TS : TDTypeVec) { bool IsA64 = false; - Type T(TS, 'd'); + Type T(TS, "."); if (T.isDouble() || (T.isPoly() && T.getElementSizeInBits() == 64)) IsA64 = true; @@ -2395,8 +2271,8 @@ void NeonEmitter::run(raw_ostream &OS) { InIfdef = true; } - char M = '2' + (NumMembers - 2); - Type VT(TS, M); + const char Mods[] = { static_cast('2' + (NumMembers - 2)), 0}; + Type VT(TS, Mods); OS << "typedef struct " << VT.str() << " {\n"; OS << " " << T.str() << " val"; OS << "[" << NumMembers << "]"; diff --git a/clang/utils/convert_arm_neon.py b/clang/utils/convert_arm_neon.py new file mode 100644 index 0000000000000..c4b3645294573 --- /dev/null +++ b/clang/utils/convert_arm_neon.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +# This script was committed on 20/11/2019 and it would probably make sense to remove +# it after the next release branches. + +# This script is pipe based and converts an arm_neon.td (or arm_fp16.td) file +# using the old single-char type modifiers to an equivalent new-style form where +# each modifier is orthogonal and they can be composed. +# +# It was used to directly generate the .td files on master, so if you have any +# local additions I would suggest implementing any modifiers here, and running +# it over your entire pre-merge .td files rather than trying to resolve any +# conflicts manually. + +import re, sys +MOD_MAP = { + 'v': 'v', + 'x': 'S', + 'u': 'U', + 'd': '.', + 'g': 'q', + 'j': 'Q', + 'w': '>Q', + 'n': '>', + 'h': '<', + 'q': '', + 's': '1', + 'z': '1<', + 'r': '1>', + 'b': '1U', + '$': '1S', + 'k': 'Q', + '2': '2', + '3': '3', + '4': '4', + 'B': '2Q', + 'C': '3Q', + 'D': '4Q', + 'p': '*', + 'c': 'c*', + '7': '< desired: + res += '<' + cur /= 2 + return res + + +def remap_protocol(proto, typespec, name): + key_type = 0 + + # Conversions like to see the integer type so they know signedness. + if 'vcvt' in name and '_f' in name and name != 'vcvt_f32_f64' and name != 'vcvt_f64_f32': + key_type = 1 + default_width = typespec_elt_size(typespec) + inconsistent_width = False + for elt in typespec: + new_width = typespec_elt_size(elt) + if new_width and new_width != default_width: + inconsistent_width = True + + res = '' + for i, c in enumerate(proto): + # void and pointers make for bad discriminators in CGBuiltin.cpp. + if c in 'vcp': + key_type += 1 + + if c in MOD_MAP: + cur_mod = MOD_MAP[c] + elif inconsistent_width: + # Otherwise it's a fixed output width modifier. + sys.stderr.write(f'warning: {name} uses fixed output size but has inconsistent input widths: {proto} {typespec}\n') + + if c == 'Y': + # y: scalar of half float + resize = get_resize(default_width, 16) + cur_mod = f'1F{resize}' + elif c == 'y': + # y: scalar of float + resize = get_resize(default_width, 32) + cur_mod = f'1F{resize}' + elif c == 'o': + # o: scalar of double + resize = get_resize(default_width, 64) + cur_mod = f'1F{resize}' + elif c == 'I': + # I: scalar of 32-bit signed + resize = get_resize(default_width, 32) + cur_mod = f'1S{resize}' + elif c == 'L': + # L: scalar of 64-bit signed + resize = get_resize(default_width, 64) + cur_mod = f'1S{resize}' + elif c == 'U': + # I: scalar of 32-bit unsigned + resize = get_resize(default_width, 32) + cur_mod = f'1U{resize}' + elif c == 'O': + # O: scalar of 64-bit unsigned + resize = get_resize(default_width, 64) + cur_mod = f'1U{resize}' + elif c == 'f': + # f: float (int args) + resize = get_resize(default_width, 32) + cur_mod = f'F{resize}' + elif c == 'F': + # F: double (int args) + resize = get_resize(default_width, 64) + cur_mod = f'F{resize}' + elif c == 'H': + # H: half (int args) + resize = get_resize(default_width, 16) + cur_mod = f'F{resize}' + elif c == '0': + # 0: half (int args), ignore 'Q' size modifier. + resize = get_resize(default_width, 16) + cur_mod = f'Fq{resize}' + elif c == '1': + # 1: half (int args), force 'Q' size modifier. + resize = get_resize(default_width, 16) + cur_mod = f'FQ{resize}' + + if len(cur_mod) == 0: + raise Exception(f'WTF: {c} in {name}') + + if key_type != 0 and key_type == i: + cur_mod += '!' + + if len(cur_mod) == 1: + res += cur_mod + else: + res += '(' + cur_mod + ')' + + return res + +def replace_insts(m): + start, end = m.span('proto') + start -= m.start() + end -= m.start() + new_proto = remap_protocol(m['proto'], m['kinds'], m['name']) + return m.group()[:start] + new_proto + m.group()[end:] + +INST = re.compile(r'Inst<"(?P.*?)",\s*"(?P.*?)",\s*"(?P.*?)"') + +new_td = INST.sub(replace_insts, sys.stdin.read()) +sys.stdout.write(new_td) From cfd9d395674030d549de286d26c0f52020de26e6 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 25 Nov 2019 14:44:51 +0100 Subject: [PATCH 014/591] [lldb][NFC] NULL -> nullptr in DWARFASTParserClang::UpdateSymbolContextScopeForType --- .../Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 01655f04c4223..3a712fc7e76b6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1330,20 +1330,20 @@ TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); dw_tag_t sc_parent_tag = sc_parent_die.Tag(); - SymbolContextScope *symbol_context_scope = NULL; + SymbolContextScope *symbol_context_scope = nullptr; if (sc_parent_tag == DW_TAG_compile_unit || sc_parent_tag == DW_TAG_partial_unit) { symbol_context_scope = sc.comp_unit; - } else if (sc.function != NULL && sc_parent_die) { + } else if (sc.function != nullptr && sc_parent_die) { symbol_context_scope = sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID()); - if (symbol_context_scope == NULL) + if (symbol_context_scope == nullptr) symbol_context_scope = sc.function; } else { symbol_context_scope = sc.module_sp.get(); } - if (symbol_context_scope != NULL) + if (symbol_context_scope != nullptr) type_sp->SetSymbolContextScope(symbol_context_scope); // We are ready to put this type into the uniqued list up at the module From bb7b8540f09a300350208ce38335a9b6280d5042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Mon, 25 Nov 2019 20:15:25 +0100 Subject: [PATCH 015/591] [InstCombine] Optimize some memccpy calls to memcpy/null Summary: return memccpy(d, "helloworld", 'r', 20) => return memcpy(d, "helloworld", 8 /* pos of 'r' in string */), d + 8 Reviewers: efriedma, jdoerfert Reviewed By: jdoerfert Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68089 --- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 + .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 41 +++++ llvm/test/Transforms/InstCombine/memccpy.ll | 165 ++++++++++++++++-- 3 files changed, 192 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 88c2ef787ad81..610668adcfa55 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -175,6 +175,7 @@ class LibCallSimplifier { Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemCCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemPCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 18a17119b47fd..6d1def357bd3b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1119,6 +1119,45 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { return CI->getArgOperand(0); } +Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilder<> &B) { + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + ConstantInt *StopChar = dyn_cast(CI->getArgOperand(2)); + ConstantInt *N = dyn_cast(CI->getArgOperand(3)); + StringRef SrcStr; + if (CI->use_empty() && Dst == Src) + return Dst; + // memccpy(d, s, c, 0) -> nullptr + if (N) { + if (N->isNullValue()) + return Constant::getNullValue(CI->getType()); + if (!getConstantStringInfo(Src, SrcStr, /*Offset=*/0, + /*TrimAtNul=*/false) || + !StopChar) + return nullptr; + } else { + return nullptr; + } + + // Wrap arg 'c' of type int to char + size_t Pos = SrcStr.find(StopChar->getSExtValue() & 0xFF); + if (Pos == StringRef::npos) { + if (N->getZExtValue() <= SrcStr.size()) { + B.CreateMemCpy(Dst, 1, Src, 1, CI->getArgOperand(3)); + return Constant::getNullValue(CI->getType()); + } + return nullptr; + } + + Value *NewN = + ConstantInt::get(N->getType(), std::min(Pos + 1, N->getZExtValue())); + // memccpy -> llvm.memcpy + B.CreateMemCpy(Dst, 1, Src, 1, NewN); + return Pos + 1 <= N->getZExtValue() + ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, NewN) + : Constant::getNullValue(CI->getType()); +} + Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilder<> &B) { Value *Dst = CI->getArgOperand(0); Value *N = CI->getArgOperand(2); @@ -2864,6 +2903,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeMemCmp(CI, Builder); case LibFunc_memcpy: return optimizeMemCpy(CI, Builder); + case LibFunc_memccpy: + return optimizeMemCCpy(CI, Builder); case LibFunc_mempcpy: return optimizeMemPCpy(CI, Builder); case LibFunc_memmove: diff --git a/llvm/test/Transforms/InstCombine/memccpy.ll b/llvm/test/Transforms/InstCombine/memccpy.ll index d911da16278a8..cbb6aa38bd07e 100644 --- a/llvm/test/Transforms/InstCombine/memccpy.ll +++ b/llvm/test/Transforms/InstCombine/memccpy.ll @@ -2,13 +2,18 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s @hello = private constant [11 x i8] c"helloworld\00", align 1 +@NoNulTerminator = private constant [10 x i8] c"helloworld", align 1 +@StopCharAfterNulTerminator = private constant [12 x i8] c"helloworld\00x", align 1 +@StringWithEOF = constant [14 x i8] c"helloworld\FFab\00", align 1 declare i8* @memccpy(i8*, i8*, i32, i64) define i8* @memccpy_to_memcpy(i8* %dst) { ; CHECK-LABEL: @memccpy_to_memcpy( -; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12) -; CHECK-NEXT: ret i8* [[CALL]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST:%.*]] to i64* +; CHECK-NEXT: store i64 8245940763182785896, i64* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 8 +; CHECK-NEXT: ret i8* [[TMP2]] ; %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12) ; 114 is 'r' ret i8* %call @@ -16,38 +21,123 @@ define i8* @memccpy_to_memcpy(i8* %dst) { define i8* @memccpy_to_memcpy2(i8* %dst) { ; CHECK-LABEL: @memccpy_to_memcpy2( -; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 5) -; CHECK-NEXT: ret i8* [[CALL]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST:%.*]] to i64* +; CHECK-NEXT: store i64 8245940763182785896, i64* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 8 +; CHECK-NEXT: ret i8* [[TMP2]] ; - %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 5) + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 8); ; 114 is 'r' ret i8* %call } define void @memccpy_to_memcpy3(i8* %dst) { ; CHECK-LABEL: @memccpy_to_memcpy3( -; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 5) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(5) [[DST:%.*]], i8* nonnull align 1 dereferenceable(5) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 5, i1 false) ; CHECK-NEXT: ret void ; - %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 5) + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) ; 111 is 'o' ret void } +define void @memccpy_to_memcpy4(i8* %dst) { +; CHECK-LABEL: @memccpy_to_memcpy4( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* nonnull align 1 dereferenceable(11) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 11, i1 false) +; CHECK-NEXT: ret void +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 0, i64 12) + ret void +} + +define i8* @memccpy_to_memcpy5(i8* %dst) { +; CHECK-LABEL: @memccpy_to_memcpy5( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(7) [[DST:%.*]], i8* nonnull align 1 dereferenceable(7) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 7, i1 false) +; CHECK-NEXT: ret i8* null +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7) + ret i8* %call +} + +define i8* @memccpy_to_memcpy6(i8* %dst) { +; CHECK-LABEL: @memccpy_to_memcpy6( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(6) [[DST:%.*]], i8* nonnull align 1 dereferenceable(6) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 6, i1 false) +; CHECK-NEXT: ret i8* null +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 6); + ret i8* %call +} + +define i8* @memccpy_to_memcpy7(i8* %dst) { +; CHECK-LABEL: @memccpy_to_memcpy7( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(5) [[DST:%.*]], i8* nonnull align 1 dereferenceable(5) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 5, i1 false) +; CHECK-NEXT: ret i8* null +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 5) ; 115 is 's' + ret i8* %call +} + +define i8* @memccpy_to_memcpy8(i8* %dst) { +; CHECK-LABEL: @memccpy_to_memcpy8( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* nonnull align 1 dereferenceable(11) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 11, i1 false) +; CHECK-NEXT: ret i8* null +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 11) ; 115 is 's' + ret i8* %call +} + +define i8* @memccpy_to_memcpy9(i8* %dst, i64 %n) { +; CHECK-LABEL: @memccpy_to_memcpy9( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(12) [[DST:%.*]], i8* nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @StopCharAfterNulTerminator, i64 0, i64 0), i64 12, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 12 +; CHECK-NEXT: ret i8* [[TMP1]] +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @StopCharAfterNulTerminator, i64 0, i64 0), i32 120, i64 15) ; 120 is 'x' + ret i8* %call +} + +define i8* @memccpy_to_memcpy10(i8* %dst, i64 %n) { +; CHECK-LABEL: @memccpy_to_memcpy10( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* nonnull align 1 dereferenceable(11) getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i64 11, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 11 +; CHECK-NEXT: ret i8* [[TMP1]] +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i32 255, i64 15) + ret i8* %call +} + +define i8* @memccpy_to_memcpy11(i8* %dst, i64 %n) { +; CHECK-LABEL: @memccpy_to_memcpy11( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* nonnull align 1 dereferenceable(11) getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i64 11, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 11 +; CHECK-NEXT: ret i8* [[TMP1]] +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i32 -1, i64 15) + ret i8* %call +} + +define i8* @memccpy_to_memcpy12(i8* %dst, i64 %n) { +; CHECK-LABEL: @memccpy_to_memcpy12( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* nonnull align 1 dereferenceable(11) getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i64 11, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 11 +; CHECK-NEXT: ret i8* [[TMP1]] +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i32 1023, i64 15) + ret i8* %call +} + define i8* @memccpy_to_null(i8* %dst, i8* %src, i32 %c) { ; CHECK-LABEL: @memccpy_to_null( -; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* [[SRC:%.*]], i32 [[C:%.*]], i64 0) -; CHECK-NEXT: ret i8* [[CALL]] +; CHECK-NEXT: ret i8* null ; %call = call i8* @memccpy(i8* %dst, i8* %src, i32 %c, i64 0) ret i8* %call } -define i8* @memccpy_to_null2(i8* %dst) { -; CHECK-LABEL: @memccpy_to_null2( -; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 5) -; CHECK-NEXT: ret i8* [[CALL]] +define void @memccpy_dst_src_same_retval_unused(i8* %dst, i32 %c, i64 %n) { +; CHECK-LABEL: @memccpy_dst_src_same_retval_unused( +; CHECK-NEXT: ret void ; - %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 5) ; 115 is 's' - ret i8* %call + %call = call i8* @memccpy(i8* %dst, i8* %dst, i32 %c, i64 %n) + ret void } ; Negative tests @@ -77,3 +167,48 @@ define i8* @unknown_size_n(i8* %dst, i64 %n) { %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 %n) ret i8* %call } + +define i8* @no_nul_terminator(i8* %dst, i64 %n) { +; CHECK-LABEL: @no_nul_terminator( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* getelementptr inbounds ([12 x i8], [12 x i8]* @StopCharAfterNulTerminator, i64 0, i64 0), i32 120, i64 [[N:%.*]]) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @StopCharAfterNulTerminator, i64 0, i64 0), i32 120, i64 %n) ; 120 is 'x' + ret i8* %call +} + +define i8* @possibly_valid_data_after_array(i8* %dst, i64 %n) { +; CHECK-LABEL: @possibly_valid_data_after_array( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* getelementptr inbounds ([10 x i8], [10 x i8]* @NoNulTerminator, i64 0, i64 0), i32 115, i64 [[N:%.*]]) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @NoNulTerminator, i64 0, i64 0), i32 115, i64 %n) ; 115 is 's' + ret i8* %call +} + +define i8* @possibly_valid_data_after_array2(i8* %dst, i64 %n) { +; CHECK-LABEL: @possibly_valid_data_after_array2( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 [[N:%.*]]) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 %n) ; 115 is 's' + ret i8* %call +} + +define i8* @possibly_valid_data_after_array3(i8* %dst) { +; CHECK-LABEL: @possibly_valid_data_after_array3( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 12) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 12) ; 115 is 's' + ret i8* %call +} + +define i8* @memccpy_dst_src_same_retval_used(i8* %dst, i32 %c, i64 %n) { +; CHECK-LABEL: @memccpy_dst_src_same_retval_used( +; CHECK-NEXT: [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* [[DST]], i32 [[C:%.*]], i64 [[N:%.*]]) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = call i8* @memccpy(i8* %dst, i8* %dst, i32 %c, i64 %n) + ret i8* %call +} From 4a59eedd2d1d942a30f6ba016168c6430f9c165c Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Thu, 21 Nov 2019 14:29:15 +0000 Subject: [PATCH 016/591] [ARM][ConstantIslands] Correct block size update When inserting a non-decrementing LE, the basic block was being resized to take into consideration that a tCMP and tBcc had been combined into one T1 instruction. This is not true in the LE case where we generate a CBN?Z and an LE. Differential Revision: https://reviews.llvm.org/D70536 --- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 20 +- .../LowOverheadLoops/out-of-range-cbz.mir | 451 ++++++++++++++++++ 2 files changed, 461 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 24ca25f73e96d..634fb89b8e893 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1917,6 +1917,7 @@ bool ARMConstantIslands::optimizeThumb2Branches() { MachineInstrBuilder MIB = BuildMI(*MBB, Br.MI, Br.MI->getDebugLoc(), TII->get(ARM::t2LE)); + // Swapped a t2Bcc for a t2LE, so no need to update the size of the block. MIB.add(Br.MI->getOperand(0)); Br.MI->eraseFromParent(); Br.MI = MIB; @@ -1975,21 +1976,20 @@ bool ARMConstantIslands::optimizeThumb2Branches() { .addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); Cmp.MI->eraseFromParent(); - BBInfoVector &BBInfo = BBUtils->getBBInfo(); - BBInfo[MBB->getNumber()].Size -= 2; if (Br.MI->getOpcode() == ARM::tBcc) { Br.MI->eraseFromParent(); Br.MI = NewBR; - } else if (&MBB->back() != Br.MI) { - // We've generated an LE and already erased the original conditional - // branch. The CBN?Z is now used to branch to the other successor, so an - // unconditional branch terminator is now redundant. + BBUtils->adjustBBSize(MBB, -2); + } else if (MBB->back().getOpcode() != ARM::t2LE) { + // An LE has been generated, but it's not the terminator - that is an + // unconditional branch. However, the logic has now been reversed with the + // CBN?Z being the conditional branch and the LE being the unconditional + // branch. So this means we can remove the redundant unconditional branch + // at the end of the block. MachineInstr *LastMI = &MBB->back(); - if (LastMI != Br.MI) { - BBInfo[MBB->getNumber()].Size -= LastMI->getDesc().getSize(); - LastMI->eraseFromParent(); - } + BBUtils->adjustBBSize(MBB, -LastMI->getDesc().getSize()); + LastMI->eraseFromParent(); } BBUtils->adjustBBOffsetsAfter(MBB); ++NumCBZ; diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir new file mode 100644 index 0000000000000..c5a38ea13454f --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/out-of-range-cbz.mir @@ -0,0 +1,451 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-cp-islands %s -o - | FileCheck %s +--- | + @d = hidden local_unnamed_addr global i32 0, align 4 + @a = hidden global i32 0, align 4 + @e = hidden local_unnamed_addr global i32 0, align 4 + + define hidden void @f(i64 %g) { + entry: + %conv = trunc i64 %g to i32 + %tobool5 = icmp eq i64 %g, 0 + br i1 %tobool5, label %j.us.us.preheader, label %entry.split + + j.us.us.preheader: ; preds = %entry + %.pre59 = load i32, i32* @d, align 4 + br label %j.us.us + + j.us.us: ; preds = %j.us.us, %if.end.us.us.us, %if.end.us.us.us.1, %if.end.us.us.us.2, %if.end.us.us.us.3, %if.end.us.us.us.4, %if.end.us.us.us.5, %if.end.us.us.us.6, %j.us.us.preheader + %0 = phi i32 [ %.pre59, %j.us.us.preheader ], [ %12, %if.end.us.us.us.6 ], [ %11, %if.end.us.us.us.5 ], [ %10, %if.end.us.us.us.4 ], [ %9, %if.end.us.us.us.3 ], [ %8, %if.end.us.us.us.2 ], [ %7, %if.end.us.us.us.1 ], [ %2, %if.end.us.us.us ], [ %0, %j.us.us ] + %cmp.us.us = icmp slt i32 %0, ptrtoint (i32* @a to i32) + %conv1.us.us = zext i1 %cmp.us.us to i32 + %1 = load i32, i32* @e, align 4 + %and.us.us = and i32 %1, %conv1.us.us + store i32 %and.us.us, i32* @e, align 4 + %tobool4.us.us.us = icmp eq i32 %0, 0 + br i1 %tobool4.us.us.us, label %if.end.us.us.us, label %j.us.us + + if.end.us.us.us: ; preds = %j.us.us + tail call void asm sideeffect "", ""() + %2 = load i32, i32* @d, align 4 + %tobool4.us.us.us.1 = icmp eq i32 %2, 0 + br i1 %tobool4.us.us.us.1, label %if.end.us.us.us.1, label %j.us.us + + entry.split: ; preds = %entry + %tobool = icmp eq i32 %conv, 0 + br i1 %tobool, label %j.us27.preheader, label %j.preheader + + j.preheader: ; preds = %entry.split + %.pre = load i32, i32* @e, align 4 + %.pre55 = load i32, i32* @d, align 4 + %cmp = icmp slt i32 %conv, ptrtoint (i32* @a to i32) + %conv1 = zext i1 %cmp to i32 + br label %j + + j.us27.preheader: ; preds = %entry.split + %.pre56 = load i32, i32* @d, align 4 + %.pre57 = load i32, i32* @e, align 4 + %cmp.us29 = icmp slt i32 %.pre56, ptrtoint (i32* @a to i32) + %conv1.us30 = zext i1 %cmp.us29 to i32 + br label %j.us27 + + j.us27: ; preds = %j.us27, %j.us27.preheader + %3 = phi i32 [ %.pre57, %j.us27.preheader ], [ %and.us31, %j.us27 ] + %4 = icmp eq i32 %.pre56, 0 + %and.us31 = and i32 %3, %conv1.us30 + br i1 %4, label %if.end.us38, label %j.us27 + + if.end.us38: ; preds = %j.us27 + store i32 %and.us31, i32* @e, align 4 + tail call void asm sideeffect "", ""() + ret void + + j: ; preds = %j, %j.preheader + %5 = phi i32 [ %.pre, %j.preheader ], [ %and, %j ] + %6 = icmp eq i32 %.pre55, 0 + %and = and i32 %5, %conv1 + br i1 %6, label %if.end, label %j + + if.end: ; preds = %j + store i32 %and, i32* @e, align 4 + tail call void asm sideeffect "", ""() + ret void + + if.end.us.us.us.1: ; preds = %if.end.us.us.us + tail call void asm sideeffect "", ""() + %7 = load i32, i32* @d, align 4 + %tobool4.us.us.us.2 = icmp eq i32 %7, 0 + br i1 %tobool4.us.us.us.2, label %if.end.us.us.us.2, label %j.us.us + + if.end.us.us.us.2: ; preds = %if.end.us.us.us.1 + tail call void asm sideeffect "", ""() + %8 = load i32, i32* @d, align 4 + %tobool4.us.us.us.3 = icmp eq i32 %8, 0 + br i1 %tobool4.us.us.us.3, label %if.end.us.us.us.3, label %j.us.us + + if.end.us.us.us.3: ; preds = %if.end.us.us.us.2 + tail call void asm sideeffect "", ""() + %9 = load i32, i32* @d, align 4 + %tobool4.us.us.us.4 = icmp eq i32 %9, 0 + br i1 %tobool4.us.us.us.4, label %if.end.us.us.us.4, label %j.us.us + + if.end.us.us.us.4: ; preds = %if.end.us.us.us.3 + tail call void asm sideeffect "", ""() + %10 = load i32, i32* @d, align 4 + %tobool4.us.us.us.5 = icmp eq i32 %10, 0 + br i1 %tobool4.us.us.us.5, label %if.end.us.us.us.5, label %j.us.us + + if.end.us.us.us.5: ; preds = %if.end.us.us.us.4 + tail call void asm sideeffect "", ""() + %11 = load i32, i32* @d, align 4 + %tobool4.us.us.us.6 = icmp eq i32 %11, 0 + br i1 %tobool4.us.us.us.6, label %if.end.us.us.us.6, label %j.us.us + + if.end.us.us.us.6: ; preds = %if.end.us.us.us.5 + tail call void asm sideeffect "", ""() + %12 = load i32, i32* @d, align 4 + %tobool4.us.us.us.7 = icmp eq i32 %12, 0 + br i1 %tobool4.us.us.us.7, label %if.end.us.us.us.7, label %j.us.us + + if.end.us.us.us.7: ; preds = %if.end.us.us.us.6 + tail call void asm sideeffect "", ""() + ret void + } + +... +--- +name: f +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.5(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $r0, $r1, $r7, $lr + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: dead renamable $r1, $cpsr = tORR killed renamable $r1, renamable $r0, 14, $noreg + ; CHECK: tBcc %bb.5, 0, killed $cpsr + ; CHECK: bb.1.entry.split: + ; CHECK: successors: %bb.15(0x30000000), %bb.2(0x50000000) + ; CHECK: liveins: $r0 + ; CHECK: tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.15, 0, killed $cpsr + ; CHECK: bb.2.j.preheader: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $r0 + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg + ; CHECK: tCMPr killed renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + ; CHECK: renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @d, 14, $noreg + ; CHECK: renamable $r2 = tLDRi killed renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg + ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: bb.3.j (align 4): + ; CHECK: successors: %bb.4(0x04000000), %bb.3(0x7c000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r3, dead $cpsr = tAND killed renamable $r3, renamable $r0, 14, $noreg + ; CHECK: tCBZ $r2, %bb.4 + ; CHECK: bb.4.if.end: + ; CHECK: liveins: $r1, $r3 + ; CHECK: tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + ; CHECK: INLINEASM &"", 1 + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc + ; CHECK: bb.5.j.us.us.preheader: + ; CHECK: successors: %bb.6(0x80000000) + ; CHECK: $r12 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + ; CHECK: $lr = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + ; CHECK: $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @d, 14, $noreg + ; CHECK: $r2 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: $lr = t2MOVTi16 killed $lr, target-flags(arm-hi16) @a, 14, $noreg + ; CHECK: $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @e, 14, $noreg + ; CHECK: bb.6.j.us.us (align 4): + ; CHECK: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r3, $r12 + ; CHECK: tCMPhir renamable $r3, renamable $lr, 14, $noreg, implicit-def $cpsr + ; CHECK: renamable $r1 = tLDRi renamable $r2, 0, 14, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + ; CHECK: renamable $r0 = t2ANDrr killed renamable $r0, killed renamable $r1, 14, $noreg, $noreg + ; CHECK: tSTRi killed renamable $r0, renamable $r2, 0, 14, $noreg :: (store 4 into @e) + ; CHECK: tCBZ $r3, %bb.7 + ; CHECK: bb.7.if.end.us.us.us: + ; CHECK: successors: %bb.8(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.8 + ; CHECK: bb.8.if.end.us.us.us.1: + ; CHECK: successors: %bb.9(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.9 + ; CHECK: bb.9.if.end.us.us.us.2: + ; CHECK: successors: %bb.10(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.10 + ; CHECK: bb.10.if.end.us.us.us.3: + ; CHECK: successors: %bb.11(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.11 + ; CHECK: bb.11.if.end.us.us.us.4: + ; CHECK: successors: %bb.12(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.12 + ; CHECK: bb.12.if.end.us.us.us.5: + ; CHECK: successors: %bb.13(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.13 + ; CHECK: bb.13.if.end.us.us.us.6: + ; CHECK: successors: %bb.14(0x04000000), %bb.6(0x7c000000) + ; CHECK: liveins: $lr, $r2, $r12 + ; CHECK: INLINEASM &"", 1 + ; CHECK: renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCBZ $r3, %bb.14 + ; CHECK: bb.14.if.end.us.us.us.7: + ; CHECK: INLINEASM &"", 1 + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc + ; CHECK: bb.15.j.us27.preheader: + ; CHECK: successors: %bb.16(0x80000000) + ; CHECK: $r0 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + ; CHECK: $r0 = t2MOVTi16 killed $r0, target-flags(arm-hi16) @d, 14, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg + ; CHECK: renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (dereferenceable load 4 from @d) + ; CHECK: tCMPr renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + ; CHECK: $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + ; CHECK: $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg + ; CHECK: renamable $r2 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + ; CHECK: renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + ; CHECK: bb.16.j.us27 (align 4): + ; CHECK: successors: %bb.17(0x04000000), %bb.16(0x7c000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3 + ; CHECK: renamable $r3, dead $cpsr = tAND killed renamable $r3, renamable $r2, 14, $noreg + ; CHECK: tCBZ $r0, %bb.17 + ; CHECK: bb.17.if.end.us38: + ; CHECK: liveins: $r1, $r3 + ; CHECK: tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + ; CHECK: INLINEASM &"", 1 + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc + bb.0.entry: + successors: %bb.1(0x30000000), %bb.11(0x50000000) + liveins: $r0, $r1, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + dead renamable $r1, $cpsr = tORR killed renamable $r1, renamable $r0, 14, $noreg + t2Bcc %bb.1, 0, killed $cpsr + + bb.11.entry.split: + successors: %bb.15(0x30000000), %bb.12(0x50000000) + liveins: $r0 + + tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.15, 0, killed $cpsr + + bb.12.j.preheader: + successors: %bb.13(0x80000000) + liveins: $r0 + + $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg + tCMPr killed renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + $r1 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @d, 14, $noreg + renamable $r2 = tLDRi killed renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @d) + $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg + renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + + bb.13.j (align 4): + successors: %bb.14(0x04000000), %bb.13(0x7c000000) + liveins: $r0, $r1, $r2, $r3 + + renamable $r3, dead $cpsr = tAND killed renamable $r3, renamable $r0, 14, $noreg + tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.13, 1, killed $cpsr + + bb.14.if.end: + liveins: $r1, $r3 + + tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + INLINEASM &"", 1 + tPOP_RET 14, $noreg, def $r7, def $pc + + bb.1.j.us.us.preheader: + successors: %bb.2(0x80000000) + + $r12 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + $lr = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + $r12 = t2MOVTi16 killed $r12, target-flags(arm-hi16) @d, 14, $noreg + $r2 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + $lr = t2MOVTi16 killed $lr, target-flags(arm-hi16) @a, 14, $noreg + $r2 = t2MOVTi16 killed $r2, target-flags(arm-hi16) @e, 14, $noreg + + bb.2.j.us.us (align 4): + successors: %bb.3(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r3, $r12 + + tCMPhir renamable $r3, renamable $lr, 14, $noreg, implicit-def $cpsr + renamable $r1 = tLDRi renamable $r2, 0, 14, $noreg :: (dereferenceable load 4 from @e) + renamable $r0 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + renamable $r0 = t2ANDrr killed renamable $r0, killed renamable $r1, 14, $noreg, $noreg + tSTRi killed renamable $r0, renamable $r2, 0, 14, $noreg :: (store 4 into @e) + t2Bcc %bb.2, 1, killed $cpsr + + bb.3.if.end.us.us.us: + successors: %bb.4(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.4.if.end.us.us.us.1: + successors: %bb.5(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.5.if.end.us.us.us.2: + successors: %bb.6(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.6.if.end.us.us.us.3: + successors: %bb.7(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.7.if.end.us.us.us.4: + successors: %bb.8(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.8.if.end.us.us.us.5: + successors: %bb.9(0x40000000), %bb.2(0x40000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.9.if.end.us.us.us.6: + successors: %bb.10(0x04000000), %bb.2(0x7c000000) + liveins: $lr, $r2, $r12 + + INLINEASM &"", 1 + renamable $r3 = t2LDRi12 renamable $r12, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPi8 renamable $r3, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.2, 1, killed $cpsr + + bb.10.if.end.us.us.us.7: + INLINEASM &"", 1 + tPOP_RET 14, $noreg, def $r7, def $pc + + bb.15.j.us27.preheader: + successors: %bb.16(0x80000000) + + $r0 = t2MOVi16 target-flags(arm-lo16) @d, 14, $noreg + $r1 = t2MOVi16 target-flags(arm-lo16) @a, 14, $noreg + $r0 = t2MOVTi16 killed $r0, target-flags(arm-hi16) @d, 14, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @a, 14, $noreg + renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg :: (dereferenceable load 4 from @d) + tCMPr renamable $r0, killed renamable $r1, 14, $noreg, implicit-def $cpsr + $r1 = t2MOVi16 target-flags(arm-lo16) @e, 14, $noreg + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @e, 14, $noreg + renamable $r2 = t2CSINC $zr, $zr, 10, implicit killed $cpsr + renamable $r3 = tLDRi renamable $r1, 0, 14, $noreg :: (dereferenceable load 4 from @e) + + bb.16.j.us27 (align 4): + successors: %bb.17(0x04000000), %bb.16(0x7c000000) + liveins: $r0, $r1, $r2, $r3 + + renamable $r3, dead $cpsr = tAND killed renamable $r3, renamable $r2, 14, $noreg + tCMPi8 renamable $r0, 0, 14, $noreg, implicit-def $cpsr + t2Bcc %bb.16, 1, killed $cpsr + + bb.17.if.end.us38: + liveins: $r1, $r3 + + tSTRi killed renamable $r3, killed renamable $r1, 0, 14, $noreg :: (store 4 into @e) + INLINEASM &"", 1 + tPOP_RET 14, $noreg, def $r7, def $pc + +... From 0e32fbd2231fe5792f53e3a1cfd9fe8e015c55e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Tue, 26 Nov 2019 11:06:06 +0100 Subject: [PATCH 017/591] [InstCombine] Fixed std::min on some bots. NFCI --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6d1def357bd3b..9e57d660b04bb 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1150,7 +1150,7 @@ Value *LibCallSimplifier::optimizeMemCCpy(CallInst *CI, IRBuilder<> &B) { } Value *NewN = - ConstantInt::get(N->getType(), std::min(Pos + 1, N->getZExtValue())); + ConstantInt::get(N->getType(), std::min(uint64_t(Pos + 1), N->getZExtValue())); // memccpy -> llvm.memcpy B.CreateMemCpy(Dst, 1, Src, 1, NewN); return Pos + 1 <= N->getZExtValue() From 7047a3a729c6b2779b512269ff3eba88d8976d63 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 26 Nov 2019 10:45:35 +0100 Subject: [PATCH 018/591] [lldb][NFC] Extract pointer to member type parsing from DWARFASTParserClang::ParseTypeFromDWARF Part of the work to split up this monolithic parsing function. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 42 +++++++++++-------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 4 ++ 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 3a712fc7e76b6..6d02f1b5ee833 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1286,24 +1286,7 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, } break; case DW_TAG_ptr_to_member_type: { - Type *pointee_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); - Type *class_type = - dwarf->ResolveTypeUID(attrs.containing_type.Reference(), true); - - CompilerType pointee_clang_type = pointee_type->GetForwardCompilerType(); - CompilerType class_clang_type = class_type->GetLayoutCompilerType(); - - clang_type = ClangASTContext::CreateMemberPointerType(class_clang_type, - pointee_clang_type); - - if (llvm::Optional clang_type_size = - clang_type.GetByteSize(nullptr)) { - type_sp = std::make_shared( - die.GetID(), dwarf, attrs.name, *clang_type_size, nullptr, - LLDB_INVALID_UID, Type::eEncodingIsUID, nullptr, clang_type, - Type::ResolveState::Forward); - } - + type_sp = ParsePointerToMemberType(die, attrs); break; } default: @@ -1320,6 +1303,29 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return UpdateSymbolContextScopeForType(sc, die, type_sp); } +TypeSP DWARFASTParserClang::ParsePointerToMemberType( + const DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs) { + SymbolFileDWARF *dwarf = die.GetDWARF(); + Type *pointee_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); + Type *class_type = + dwarf->ResolveTypeUID(attrs.containing_type.Reference(), true); + + CompilerType pointee_clang_type = pointee_type->GetForwardCompilerType(); + CompilerType class_clang_type = class_type->GetLayoutCompilerType(); + + CompilerType clang_type = ClangASTContext::CreateMemberPointerType( + class_clang_type, pointee_clang_type); + + if (llvm::Optional clang_type_size = + clang_type.GetByteSize(nullptr)) { + return std::make_shared(die.GetID(), dwarf, attrs.name, + *clang_type_size, nullptr, LLDB_INVALID_UID, + Type::eEncodingIsUID, nullptr, clang_type, + Type::ResolveState::Forward); + } + return nullptr; +} + TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( const SymbolContext &sc, const DWARFDIE &die, TypeSP type_sp) { if (!type_sp) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 982a089981d4e..b92c397394544 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -168,6 +168,10 @@ class DWARFASTParserClang : public DWARFASTParser { // Return true if this type is a declaration to a type in an external // module. lldb::ModuleSP GetModuleForType(const DWARFDIE &die); + +private: + lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, + const ParsedDWARFTypeAttributes &attrs); }; /// Parsed form of all attributes that are relevant for type reconstruction. From cced971fd3d6713ec4989990e1b2f42c8539f0f3 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 Nov 2019 10:03:25 +0000 Subject: [PATCH 019/591] [ARM][ReachingDefs] RDA in LoLoops Add several new methods to ReachingDefAnalysis: - getReachingMIDef, instead of returning an integer, return the MachineInstr that produces the def. - getInstFromId, return a MachineInstr for which the given integer corresponds to. - hasSameReachingDef, return whether two MachineInstr use the same def of a register. - isRegUsedAfter, return whether a register is used after a given MachineInstr. These methods have been used in ARMLowOverhead to replace searching for uses/defs. Differential Revision: https://reviews.llvm.org/D70009 --- .../llvm/CodeGen/ReachingDefAnalysis.h | 18 ++- llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 52 ++++++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 132 +++++---------- llvm/test/CodeGen/ARM/O3-pipeline.ll | 1 + .../LowOverheadLoops/unsafe-cpsr-loop-def.mir | 153 ++++++++++++++++++ .../LowOverheadLoops/unsafe-cpsr-loop-use.mir | 152 +++++++++++++++++ .../CodeGen/Thumb2/ifcvt-neon-deprecated.mir | 113 ++++++++----- 7 files changed, 488 insertions(+), 133 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir diff --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h index 9ab9e8068eabf..dda82b7717e77 100644 --- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h +++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -87,13 +87,29 @@ class ReachingDefAnalysis : public MachineFunctionPass { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); + MachineFunctionProperties::Property::NoVRegs).set( + MachineFunctionProperties::Property::TracksLiveness); } /// Provides the instruction id of the closest reaching def instruction of /// PhysReg that reaches MI, relative to the begining of MI's basic block. int getReachingDef(MachineInstr *MI, int PhysReg); + /// Provides the instruction of the closest reaching def instruction of + /// PhysReg that reaches MI, relative to the begining of MI's basic block. + MachineInstr *getReachingMIDef(MachineInstr *MI, int PhysReg); + + /// Provides the MI, from the given block, corresponding to the Id or a + /// nullptr if the id does not refer to the block. + MachineInstr *getInstFromId(MachineBasicBlock *MBB, int InstId); + + /// Return whether A and B use the same def of PhysReg. + bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, int PhysReg); + + /// Return whether the given register is used after MI, whether it's a local + /// use or a live out. + bool isRegUsedAfter(MachineInstr *MI, int PhysReg); + /// Provides the clearance - the number of instructions since the closest /// reaching def instuction of PhysReg that reaches MI. int getClearance(MachineInstr *MI, MCPhysReg PhysReg); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 2850033e64196..55d9cb65999ca 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -189,7 +190,58 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { return LatestDef; } +MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) { + return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)); +} + +MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, + int InstId) { + assert(MBB->getNumber() < MBBReachingDefs.size() && + "Unexpected basic block number."); + assert(InstId < static_cast(MBB->size()) && + "Unexpected instruction id."); + + if (InstId < 0) + return nullptr; + + for (auto &MI : *MBB) { + if (InstIds.count(&MI) && InstIds[&MI] == InstId) + return &MI; + } + return nullptr; +} + int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { assert(InstIds.count(MI) && "Unexpected machine instuction."); return InstIds[MI] - getReachingDef(MI, PhysReg); } + +bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, + int PhysReg) { + MachineBasicBlock *ParentA = A->getParent(); + MachineBasicBlock *ParentB = B->getParent(); + if (ParentA != ParentB) + return false; + + return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg); +} + +bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { + MachineBasicBlock *MBB = MI->getParent(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + + // Yes if the register is live out of the basic block. + if (LiveRegs.contains(PhysReg)) + return true; + + // Walk backwards through the block to see if the register is live at some + // point. + for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) { + LiveRegs.stepBackward(*Last); + if (LiveRegs.contains(PhysReg)) + return InstIds[&*Last] > InstIds[MI]; + } + return false; +} + diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 733a3f166069f..7487a43b7aa3f 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -25,6 +25,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/MC/MCInstrDesc.h" using namespace llvm; @@ -104,10 +106,11 @@ namespace { // Is it safe to define LR with DLS/WLS? // LR can be defined if it is the operand to start, because it's the same // value, or if it's going to be equivalent to the operand to Start. - MachineInstr *IsSafeToDefineLR(); + MachineInstr *IsSafeToDefineLR(ReachingDefAnalysis *RDA); - // Check the branch targets are within range and we satisfy our restructi - void CheckLegality(ARMBasicBlockUtils *BBUtils); + // Check the branch targets are within range and we satisfy our + // restrictions. + void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA); bool FoundAllComponents() const { return Start && Dec && End; @@ -127,6 +130,7 @@ namespace { class ARMLowOverheadLoops : public MachineFunctionPass { MachineFunction *MF = nullptr; + ReachingDefAnalysis *RDA = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; std::unique_ptr BBUtils = nullptr; @@ -139,6 +143,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -146,7 +151,8 @@ namespace { MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( - MachineFunctionProperties::Property::NoVRegs); + MachineFunctionProperties::Property::NoVRegs).set( + MachineFunctionProperties::Property::TracksLiveness); } StringRef getPassName() const override { @@ -183,31 +189,6 @@ static bool IsLoopStart(MachineInstr &MI) { MI.getOpcode() == ARM::t2WhileLoopStart; } -template -static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) { - for(auto &MI : make_range(T(Begin), End)) { - for (auto &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) - continue; - return &MI; - } - } - return nullptr; -} - -static MachineInstr* SearchForUse(MachineInstr *Begin, - MachineBasicBlock::iterator End, - unsigned Reg) { - for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) { - for (auto &MO : MI.operands()) { - if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) - continue; - return &MI; - } - } - return nullptr; -} - static bool IsVCTP(MachineInstr *MI) { switch (MI->getOpcode()) { default: @@ -221,73 +202,41 @@ static bool IsVCTP(MachineInstr *MI) { return false; } -MachineInstr *LowOverheadLoop::IsSafeToDefineLR() { +MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) { + // We can define LR because LR already contains the same value. + if (Start->getOperand(0).getReg() == ARM::LR) + return Start; - auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) { + unsigned CountReg = Start->getOperand(0).getReg(); + auto IsMoveLR = [&CountReg](MachineInstr *MI) { return MI->getOpcode() == ARM::tMOVr && MI->getOperand(0).getReg() == ARM::LR && - MI->getOperand(1).getReg() == Reg && + MI->getOperand(1).getReg() == CountReg && MI->getOperand(2).getImm() == ARMCC::AL; }; MachineBasicBlock *MBB = Start->getParent(); - unsigned CountReg = Start->getOperand(0).getReg(); - // Walk forward and backward in the block to find the closest instructions - // that define LR. Then also filter them out if they're not a mov lr. - MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR); - if (PredLRDef && !IsMoveLR(PredLRDef, CountReg)) - PredLRDef = nullptr; - - MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR); - if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg)) - SuccLRDef = nullptr; - - // We've either found one, two or none mov lr instructions... Now figure out - // if they are performing the equilvant mov that the Start instruction will. - // Do this by scanning forward and backward to see if there's a def of the - // register holding the count value. If we find a suitable def, return it as - // the insert point. Later, if InsertPt != Start, then we can remove the - // redundant instruction. - if (SuccLRDef) { - MachineBasicBlock::iterator End(SuccLRDef); - if (!SearchForDef(Start, End, CountReg)) { - return SuccLRDef; - } else - SuccLRDef = nullptr; - } - if (PredLRDef) { - MachineBasicBlock::reverse_iterator End(PredLRDef); - if (!SearchForDef(Start, End, CountReg)) { - return PredLRDef; - } else - PredLRDef = nullptr; - } - // We can define LR because LR already contains the same value. - if (Start->getOperand(0).getReg() == ARM::LR) - return Start; + // Find an insertion point: + // - Is there a (mov lr, Count) before Start? If so, and nothing else writes + // to Count before Start, we can insert at that mov. + // - Is there a (mov lr, Count) after Start? If so, and nothing else writes + // to Count after Start, we can insert at that mov. + if (auto *LRDef = RDA->getReachingMIDef(&MBB->back(), ARM::LR)) { + if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg)) + return LRDef; + } // We've found no suitable LR def and Start doesn't use LR directly. Can we - // just define LR anyway? - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - LivePhysRegs LiveRegs(*TRI); - LiveRegs.addLiveOuts(*MBB); - - // Not if we've haven't found a suitable mov and LR is live out. - if (LiveRegs.contains(ARM::LR)) - return nullptr; - - // If LR is not live out, we can insert the instruction if nothing else - // uses LR after it. - if (!SearchForUse(Start, MBB->end(), ARM::LR)) + // just define LR anyway? + if (!RDA->isRegUsedAfter(Start, ARM::LR)) return Start; - LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for" - << " LR\n"); return nullptr; } -void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) { +void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, + ReachingDefAnalysis *RDA) { if (Revert) return; @@ -320,7 +269,7 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils) { return; } - InsertPt = Revert ? nullptr : IsSafeToDefineLR(); + InsertPt = Revert ? nullptr : IsSafeToDefineLR(RDA); if (!InsertPt) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; @@ -343,6 +292,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); auto &MLI = getAnalysis(); + RDA = &getAnalysis(); MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); MRI = &MF->getRegInfo(); TII = static_cast(ST.getInstrInfo()); @@ -462,7 +412,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { if (!LoLoop.FoundAllComponents()) return false; - LoLoop.CheckLegality(BBUtils.get()); + LoLoop.CheckLegality(BBUtils.get(), RDA); Expand(LoLoop); return true; } @@ -493,19 +443,15 @@ void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { } bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI, - bool AllowFlags) const { + bool SetFlags) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); MachineBasicBlock *MBB = MI->getParent(); - // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS. - bool SetFlags = false; - if (AllowFlags) { - if (auto *Def = SearchForDef(MI, MBB->end(), ARM::CPSR)) { - if (!SearchForUse(MI, MBB->end(), ARM::CPSR) && - Def->getOpcode() == ARM::t2LoopEnd) - SetFlags = true; - } - } + // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS. + if (SetFlags && + (RDA->isRegUsedAfter(MI, ARM::CPSR) || + !RDA->hasSameReachingDef(MI, &MBB->back(), ARM::CPSR))) + SetFlags = false; MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index dd741388d7499..f45302fbc1b3b 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -154,6 +154,7 @@ ; CHECK-NEXT: ARM constant island placement and branch shortening pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: ReachingDefAnalysis ; CHECK-NEXT: ARM Low Overhead Loops pass ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir new file mode 100644 index 0000000000000..2ccb8da48d841 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-def.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops -verify-machineinstrs %s -o - | FileCheck %s +# Check that subs isn't used during the revert because there's a def after LoopDec. + +--- | + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + %limit = lshr i32 %n, 1 + br label %while.body + + while.body: ; preds = %while.body, %entry + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ] + %tmp = phi i32 [ %n, %entry ], [ %tmp2, %while.body ] + %scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1 + %tmp1 = load i32, i32* %scevgep7, align 4 + %tmp2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp, i32 1) + %half = lshr i32 %tmp1, 1 + %cmp = icmp ult i32 %tmp, %limit + %res = select i1 %cmp, i32 %tmp1, i32 %half + store i32 %res, i32* %scevgep4, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %tmp3 = icmp ne i32 %tmp2, 0 + br i1 %tmp3, label %while.body, label %while.end + + while.end: ; preds = %while.body + ret i32 0 + } + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #0 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: do_copy + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r7, $lr + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: $lr = tMOVr killed $r0, 14, $noreg + ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + ; CHECK: renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $r2 = t2LSRri renamable $lr, 1, 14, $noreg, $noreg + ; CHECK: bb.1.while.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr + ; CHECK: $lr = t2SUBri killed renamable $lr, 1, 14, $noreg, $noreg + ; CHECK: t2IT 2, 8, implicit-def $itstate + ; CHECK: renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + ; CHECK: t2CMPri renamable $lr, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.1, 4, killed $cpsr + ; CHECK: tB %bb.2, 14, $noreg + ; CHECK: bb.2.while.end: + ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14, $noreg + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $lr = tMOVr killed $r0, 14, $noreg + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + renamable $r2 = t2LSRri renamable $lr, 1, 14, $noreg, $noreg + t2DoLoopStart renamable $lr + + bb.1.while.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1, $r2 + + renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2IT 2, 8, implicit-def $itstate + renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + t2CMPri renamable $lr, 0, 14, $noreg, implicit-def $cpsr + tBcc %bb.1, 4, killed $cpsr + tB %bb.2, 14, $noreg + + bb.2.while.end: + $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir new file mode 100644 index 0000000000000..c052e22d217d6 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unsafe-cpsr-loop-use.mir @@ -0,0 +1,152 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops -verify-machineinstrs %s -o - | FileCheck %s +# Check that subs isn't used during the revert because there's a cpsr use after it. + +--- | + define i32 @do_copy(i32 %n, i32* nocapture %p, i32* nocapture readonly %q) { + entry: + %scevgep = getelementptr i32, i32* %q, i32 -1 + %scevgep3 = getelementptr i32, i32* %p, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %n) + %limit = lshr i32 %n, 1 + br label %while.body + + while.body: ; preds = %while.body, %entry + %lsr.iv4 = phi i32* [ %scevgep5, %while.body ], [ %scevgep3, %entry ] + %lsr.iv = phi i32* [ %scevgep1, %while.body ], [ %scevgep, %entry ] + %tmp = phi i32 [ %n, %entry ], [ %tmp2, %while.body ] + %scevgep7 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep4 = getelementptr i32, i32* %lsr.iv4, i32 1 + %tmp1 = load i32, i32* %scevgep7, align 4 + %tmp2 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp, i32 1) + %half = lshr i32 %tmp1, 1 + %cmp = icmp ult i32 %tmp, %limit + %res = select i1 %cmp, i32 %tmp1, i32 %half + store i32 %res, i32* %scevgep4, align 4 + %scevgep1 = getelementptr i32, i32* %lsr.iv, i32 1 + %scevgep5 = getelementptr i32, i32* %lsr.iv4, i32 1 + %tmp3 = icmp ne i32 %tmp2, 0 + br i1 %tmp3, label %while.body, label %while.end + + while.end: ; preds = %while.body + ret i32 0 + } + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #0 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: do_copy +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: do_copy + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2, $r7, $lr + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + ; CHECK: renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + ; CHECK: renamable $r2 = t2LSRri renamable $r0, 1, 14, $noreg, $noreg + ; CHECK: $lr = tMOVr killed $r0, 14, $noreg + ; CHECK: bb.1.while.body: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2 + ; CHECK: renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + ; CHECK: tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr + ; CHECK: $lr = t2SUBri killed renamable $lr, 1, 14, $noreg, $noreg + ; CHECK: t2IT 2, 8, implicit-def $itstate + ; CHECK: renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate + ; CHECK: early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + ; CHECK: t2CMPri $lr, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.1, 1, $cpsr + ; CHECK: tB %bb.2, 14, $noreg + ; CHECK: bb.2.while.end: + ; CHECK: $r0, dead $cpsr = tMOVi8 0, 14, $noreg + ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 4, 14, $noreg + renamable $r1, dead $cpsr = tSUBi3 killed renamable $r2, 4, 14, $noreg + t2DoLoopStart renamable $r0 + renamable $r2 = t2LSRri renamable $r0, 1, 14, $noreg, $noreg + $lr = tMOVr killed $r0, 14, $noreg + + bb.1.while.body: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $r0, $r1, $r2 + + renamable $r3, renamable $r1 = t2LDR_PRE killed renamable $r1, 4, 14, $noreg :: (load 4 from %ir.scevgep7) + tCMPhir renamable $lr, renamable $r2, 14, $noreg, implicit-def $cpsr + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2IT 2, 8, implicit-def $itstate + renamable $r3 = tLSRri $noreg, killed renamable $r3, 1, 2, killed $cpsr, implicit renamable $r3, implicit killed $itstate + early-clobber renamable $r0 = t2STR_PRE killed renamable $r3, killed renamable $r0, 4, 14, $noreg :: (store 4 into %ir.scevgep4) + t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14, $noreg + + bb.2.while.end: + $r0, dead $cpsr = tMOVi8 0, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc, implicit killed $r0 + +... diff --git a/llvm/test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir b/llvm/test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir index 58ddfcc2a683e..1f5edb0c78b91 100644 --- a/llvm/test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir +++ b/llvm/test/CodeGen/Thumb2/ifcvt-neon-deprecated.mir @@ -1,54 +1,89 @@ -# RUN: llc -mtriple=thumbv7 -start-before=if-converter -o - %s | FileCheck %s +# RUN: llc -mtriple=thumbv7 -start-before=if-converter %s -o - | FileCheck %s + +--- | + ; ModuleID = 'vdup-test.ll' + source_filename = "vdup-test.ll" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7" + + define arm_aapcs_vfpcc <2 x i32> @NeonVdupMul(i32 %scalar, i32 %N, <2 x i32> %vector) { + entry: + %cmp = icmp ne i32 %N, 0 + %broadcast = insertelement <2 x i32> undef, i32 %scalar, i32 0 + %dup = shufflevector <2 x i32> %broadcast, <2 x i32> undef, <2 x i32> zeroinitializer + %mul = mul <2 x i32> %dup, %vector + br i1 %cmp, label %select.end, label %select.false + + select.false: ; preds = %entry + br label %select.end + + select.end: ; preds = %entry, %select.false + %res = phi <2 x i32> [ %mul, %entry ], [ %vector, %select.false ] + ret <2 x i32> %res + } + +... --- name: NeonVdupMul +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$d0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} body: | - bb.0: - successors: %bb.2, %bb.1 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) liveins: $d0, $r0, $r1 - - t2CMPri killed $r1, 0, 14, $noreg, implicit-def $cpsr + + t2CMPri killed renamable $r1, 0, 14, $noreg, implicit-def $cpsr t2Bcc %bb.2, 0, killed $cpsr - + bb.1: + successors: %bb.2(0x80000000) liveins: $d0, $r0 - - $d16 = VDUP32d killed $r0, 14, $noreg + + renamable $d16 = VDUP32d killed renamable $r0, 14, $noreg ; Verify that the neon instructions haven't been conditionalized: ; CHECK-LABEL: NeonVdupMul ; CHECK: vdup.32 ; CHECK: vmul.i32 - $d0 = VMULv2i32 killed $d16, killed $d0, 14, $noreg - - bb.2: + renamable $d0 = VMULv2i32 killed renamable $d16, killed renamable $d0, 14, $noreg + + bb.2.select.end: liveins: $d0 - - tBX_RET 14, $noreg, implicit $d0 - -... ---- -name: NeonVmovVfpLdr -body: | - bb.0.entry: - successors: %bb.1, %bb.2 - liveins: $r0, $r1 - - t2CMPri killed $r1, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.2, 1, killed $cpsr - - bb.1: - $d0 = VMOVv2i32 0, 14, $noreg - tBX_RET 14, $noreg, implicit $d0 - - bb.2: - liveins: $r0 - - $d0 = VLDRD killed $r0, 0, 14, $noreg - ; Verify that the neon instruction VMOVv2i32 hasn't been conditionalized, - ; but the VLDR instruction that is available both in the VFP and Advanced - ; SIMD extensions has. - ; CHECK-LABEL: NeonVmovVfpLdr - ; CHECK-DAG: vmov.i32 d0, #0x0 - ; CHECK-DAG: vldr{{ne|eq}} d0, [r0] + tBX_RET 14, $noreg, implicit $d0 ... From 3ec193fb527e697faac4ef8f30934dd7bce849a7 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 26 Nov 2019 11:16:48 +0100 Subject: [PATCH 020/591] [SystemZ] Don't build a PPA instruction with an immediate 0 operand. The improvement in the machine verifier for operand types (D63973) discovered a bad operand in a test using a PPA instruction. It was an immediate 0 where a register was expected. This patch fixes this (NFC) by now making the PPA second register operand NoRegister instead of a zero immediate in the MIR. Review: Ulrich Weigand https://reviews.llvm.org/D70501 --- .../Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp | 8 ++++++-- llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp index 91cb35dd72f26..c5cce39747a9e 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp @@ -41,8 +41,12 @@ void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, void SystemZInstPrinter::printOperand(const MCOperand &MO, const MCAsmInfo *MAI, raw_ostream &O) { - if (MO.isReg()) - O << '%' << getRegisterName(MO.getReg()); + if (MO.isReg()) { + if (!MO.getReg()) + O << '0'; + else + O << '%' << getRegisterName(MO.getReg()); + } else if (MO.isImm()) O << MO.getImm(); else if (MO.isExpr()) diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index 8b334756611a4..041971ca7cb8f 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -2069,7 +2069,7 @@ let Predicates = [FeatureProcessorAssist] in { def PPA : SideEffectTernaryRRFc<"ppa", 0xB2E8, GR64, GR64, imm32zx4>; def : Pat<(int_s390_ppa_txassist GR32:$src), (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), - 0, 1)>; + zero_reg, 1)>; } //===----------------------------------------------------------------------===// From 28166816b05aebb3154e5f8a28b3ef447cce8471 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 26 Nov 2019 10:25:04 +0000 Subject: [PATCH 021/591] [ARM][ReachingDefs] Remove dead code in loloops. Add some more helper functions to ReachingDefs to query the uses of a given MachineInstr and also to query whether two MachineInstrs use the same def of a register. For Arm, while tail-predicating, these helpers are used in the low-overhead loops to remove the dead code that calculates the number of loop iterations. Differential Revision: https://reviews.llvm.org/D70240 --- .../llvm/CodeGen/ReachingDefAnalysis.h | 9 + llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 43 +++- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 189 +++++++++++++----- .../Thumb2/LowOverheadLoops/fast-fp-loops.ll | 6 +- .../LowOverheadLoops/mve-tail-data-types.ll | 78 ++------ .../LowOverheadLoops/vector-arith-codegen.ll | 49 +---- .../CodeGen/Thumb2/LowOverheadLoops/wlstp.mir | 21 +- 7 files changed, 214 insertions(+), 181 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h index dda82b7717e77..ac001e326c570 100644 --- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h +++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -114,6 +114,15 @@ class ReachingDefAnalysis : public MachineFunctionPass { /// reaching def instuction of PhysReg that reaches MI. int getClearance(MachineInstr *MI, MCPhysReg PhysReg); + /// Provides the uses, in the same block as MI, of register that MI defines. + /// This does not consider live-outs. + void getReachingLocalUses(MachineInstr *MI, int PhysReg, + SmallVectorImpl &Uses); + + /// Provide the number of uses, in the same block as MI, of the register that + /// MI defines. + unsigned getNumUses(MachineInstr *MI, int PhysReg); + private: /// Set up LiveRegs by merging predecessor live-out values. void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index 55d9cb65999ca..ad7f910be4c52 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -194,9 +194,19 @@ MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysRe return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)); } +bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, + int PhysReg) { + MachineBasicBlock *ParentA = A->getParent(); + MachineBasicBlock *ParentB = B->getParent(); + if (ParentA != ParentB) + return false; + + return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg); +} + MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, int InstId) { - assert(MBB->getNumber() < MBBReachingDefs.size() && + assert(static_cast(MBB->getNumber()) < MBBReachingDefs.size() && "Unexpected basic block number."); assert(InstId < static_cast(MBB->size()) && "Unexpected instruction id."); @@ -216,14 +226,31 @@ int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { return InstIds[MI] - getReachingDef(MI, PhysReg); } -bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B, - int PhysReg) { - MachineBasicBlock *ParentA = A->getParent(); - MachineBasicBlock *ParentB = B->getParent(); - if (ParentA != ParentB) - return false; +void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, + SmallVectorImpl &Uses) { + MachineBasicBlock *MBB = Def->getParent(); + MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); + while (++MI != MBB->end()) { + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg) + continue; + + // If/when we find a new reaching def, we know that there's no more uses + // of 'Def'. + if (getReachingMIDef(&*MI, PhysReg) != Def) + return; + + Uses.push_back(&*MI); + if (MO.isKill()) + return; + } + } +} - return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg); +unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) { + SmallVector Uses; + getReachingLocalUses(Def, PhysReg, Uses); + return Uses.size(); } bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 7487a43b7aa3f..756d0fdb55702 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -110,12 +110,41 @@ namespace { // Check the branch targets are within range and we satisfy our // restrictions. - void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA); + void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA, + MachineLoopInfo *MLI); bool FoundAllComponents() const { return Start && Dec && End; } + // Return the loop iteration count, or the number of elements if we're tail + // predicating. + MachineOperand &getCount() { + return IsTailPredicationLegal() ? + VCTP->getOperand(1) : Start->getOperand(0); + } + + unsigned getStartOpcode() const { + bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; + if (!IsTailPredicationLegal()) + return IsDo ? ARM::t2DLS : ARM::t2WLS; + + switch (VCTP->getOpcode()) { + default: + llvm_unreachable("unhandled vctp opcode"); + break; + case ARM::MVE_VCTP8: + return IsDo ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; + case ARM::MVE_VCTP16: + return IsDo ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; + case ARM::MVE_VCTP32: + return IsDo ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; + case ARM::MVE_VCTP64: + return IsDo ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; + } + return 0; + } + void dump() const { if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; @@ -130,6 +159,7 @@ namespace { class ARMLowOverheadLoops : public MachineFunctionPass { MachineFunction *MF = nullptr; + MachineLoopInfo *MLI = nullptr; ReachingDefAnalysis *RDA = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; @@ -236,7 +266,8 @@ MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) { } void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, - ReachingDefAnalysis *RDA) { + ReachingDefAnalysis *RDA, + MachineLoopInfo *MLI) { if (Revert) return; @@ -273,14 +304,70 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, if (!InsertPt) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; + return; } else LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt); - LLVM_DEBUG(if (IsTailPredicationLegal()) { - dbgs() << "ARM Loops: Will use tail predication to convert:\n"; + // For tail predication, we need to provide the number of elements, instead + // of the iteration count, to the loop start instruction. The number of + // elements is provided to the vctp instruction, so we need to check that + // we can use this register at InsertPt. + if (!IsTailPredicationLegal()) + return; + + Register NumElements = VCTP->getOperand(1).getReg(); + + // If the register is defined within loop, then we can't perform TP. + // TODO: Check whether this is just a mov of a register that would be + // available. + if (RDA->getReachingDef(VCTP, NumElements) >= 0) { + CannotTailPredicate = true; + return; + } + + // We can't perform TP if the register does not hold the same value at + // InsertPt as the liveout value. + MachineBasicBlock *InsertBB = InsertPt->getParent(); + if (!RDA->hasSameReachingDef(InsertPt, &InsertBB->back(), + NumElements)) { + CannotTailPredicate = true; + return; + } + + // Especially in the case of while loops, InsertBB may not be the + // preheader, so we need to check that the register isn't redefined + // before entering the loop. + auto CannotProvideElements = [&RDA](MachineBasicBlock *MBB, + Register NumElements) { + // NumElements is redefined in this block. + if (RDA->getReachingDef(&MBB->back(), NumElements) >= 0) + return true; + + // Don't continue searching up through multiple predecessors. + if (MBB->pred_size() > 1) + return true; + + return false; + }; + + // First, find the block that looks like the preheader. + MachineBasicBlock *MBB = MLI->findLoopPreheader(ML, true); + if (!MBB) { + CannotTailPredicate = true; + return; + } + + // Then search backwards for a def, until we get to InsertBB. + while (MBB != InsertBB) { + CannotTailPredicate = CannotProvideElements(MBB, NumElements); + if (CannotTailPredicate) + return; + MBB = *MBB->pred_begin(); + } + + LLVM_DEBUG(dbgs() << "ARM Loops: Will use tail predication to convert:\n"; for (auto *MI : VPTUsers) - dbgs() << " - " << *MI; - }); + dbgs() << " - " << *MI;); } bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { @@ -291,7 +378,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { MF = &mf; LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); - auto &MLI = getAnalysis(); + MLI = &getAnalysis(); RDA = &getAnalysis(); MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); MRI = &MF->getRegInfo(); @@ -301,7 +388,7 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { BBUtils->adjustBBOffsetsAfter(&MF->front()); bool Changed = false; - for (auto ML : MLI) { + for (auto ML : *MLI) { if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } @@ -317,7 +404,14 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { for (auto I = ML->begin(), E = ML->end(); I != E; ++I) Changed |= ProcessLoop(*I); - LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); + LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n"; + if (auto *Preheader = ML->getLoopPreheader()) + dbgs() << " - " << Preheader->getName() << "\n"; + else if (auto *Preheader = MLI->findLoopPreheader(ML)) + dbgs() << " - " << Preheader->getName() << "\n"; + for (auto *MBB : ML->getBlocks()) + dbgs() << " - " << MBB->getName() << "\n"; + ); // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. @@ -333,28 +427,15 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { }; LowOverheadLoop LoLoop(ML); - // Search the preheader for the start intrinsic, or look through the - // predecessors of the header to find exactly one set.iterations intrinsic. + // Search the preheader for the start intrinsic. // FIXME: I don't see why we shouldn't be supporting multiple predecessors // with potentially multiple set.loop.iterations, so we need to enable this. if (auto *Preheader = ML->getLoopPreheader()) LoLoop.Start = SearchForStart(Preheader); - else { - LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n" - << " - Performing manual predecessor search.\n"); - MachineBasicBlock *Pred = nullptr; - for (auto *MBB : ML->getHeader()->predecessors()) { - if (!ML->contains(MBB)) { - if (Pred) { - LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n"); - LoLoop.Start = nullptr; - break; - } - Pred = MBB; - LoLoop.Start = SearchForStart(MBB); - } - } - } + else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) + LoLoop.Start = SearchForStart(Preheader); + else + return false; // Find the low-overhead loop components and decide whether or not to fall // back to a normal loop. Also look for a vctp instructions and decide @@ -412,7 +493,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { if (!LoLoop.FoundAllComponents()) return false; - LoLoop.CheckLegality(BBUtils.get(), RDA); + LoLoop.CheckLegality(BBUtils.get(), RDA, MLI); Expand(LoLoop); return true; } @@ -504,35 +585,45 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { MachineInstr *Start = LoLoop.Start; MachineBasicBlock *MBB = InsertPt->getParent(); bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; - unsigned Opc = 0; - - if (!LoLoop.IsTailPredicationLegal()) - Opc = IsDo ? ARM::t2DLS : ARM::t2WLS; - else { - switch (LoLoop.VCTP->getOpcode()) { - case ARM::MVE_VCTP8: - Opc = IsDo ? ARM::MVE_DLSTP_8 : ARM::MVE_WLSTP_8; - break; - case ARM::MVE_VCTP16: - Opc = IsDo ? ARM::MVE_DLSTP_16 : ARM::MVE_WLSTP_16; - break; - case ARM::MVE_VCTP32: - Opc = IsDo ? ARM::MVE_DLSTP_32 : ARM::MVE_WLSTP_32; - break; - case ARM::MVE_VCTP64: - Opc = IsDo ? ARM::MVE_DLSTP_64 : ARM::MVE_WLSTP_64; - break; - } - } + unsigned Opc = LoLoop.getStartOpcode(); + MachineOperand &Count = LoLoop.getCount(); MachineInstrBuilder MIB = BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); MIB.addDef(ARM::LR); - MIB.add(Start->getOperand(0)); + MIB.add(Count); if (!IsDo) MIB.add(Start->getOperand(1)); + // When using tail-predication, try to delete the dead code that was used to + // calculate the number of loop iterations. + if (LoLoop.IsTailPredicationLegal()) { + SmallVector Killed; + SmallVector Dead; + if (auto *Def = RDA->getReachingMIDef(Start, + Start->getOperand(0).getReg())) { + Killed.push_back(Def); + + while (!Killed.empty()) { + MachineInstr *Def = Killed.back(); + Killed.pop_back(); + Dead.push_back(Def); + for (auto &MO : Def->operands()) { + if (!MO.isReg() || !MO.isKill()) + continue; + + MachineInstr *Kill = RDA->getReachingMIDef(Def, MO.getReg()); + if (Kill && RDA->getNumUses(Kill, MO.getReg()) == 1) + Killed.push_back(Kill); + } + } + for (auto *MI : Dead) + MI->eraseFromParent(); + } + } + + // If we're inserting at a mov lr, then remove it as it's redundant. if (InsertPt != Start) InsertPt->eraseFromParent(); Start->eraseFromParent(); diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index 02d05ef9c0f61..f285b445cf3cf 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -36,11 +36,7 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB0_8 ; CHECK-NEXT: .LBB0_4: @ %vector.ph -; CHECK-NEXT: adds r6, r3, #3 -; CHECK-NEXT: bic r6, r6, #3 -; CHECK-NEXT: subs r6, #4 -; CHECK-NEXT: add.w lr, r12, r6, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 38e688bbf6288..21be95e1fcc8a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -16,17 +16,19 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r12, r2 ; CHECK-NEXT: adds r2, r1, r3 -; CHECK-NEXT: vldrb.u32 q2, [r2] +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrbt.u32 q2, [r2] ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: sub.w r2, r12, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: letp lr, .LBB0_1 +; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %middle.block ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -82,13 +84,8 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.s32 q2, [r1] @@ -160,17 +157,19 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r12, r2 ; CHECK-NEXT: adds r2, r1, r3 -; CHECK-NEXT: vldrb.u32 q2, [r2] +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrbt.u32 q2, [r2] ; CHECK-NEXT: adds r3, #4 ; CHECK-NEXT: sub.w r2, r12, #4 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 -; CHECK-NEXT: letp lr, .LBB2_1 +; CHECK-NEXT: le lr, .LBB2_1 ; CHECK-NEXT: @ %bb.2: @ %middle.block ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpsel q0, q0, q1 @@ -226,13 +225,8 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u32 q2, [r1] @@ -297,13 +291,8 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q2, [r1] @@ -392,13 +381,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: b .LBB5_9 ; CHECK-NEXT: .LBB5_4: @ %vector.ph -; CHECK-NEXT: add.w r7, r12, #3 -; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: bic r7, r7, #3 ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB5_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r5, r0, r4 @@ -607,12 +591,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: add.w lr, r12, #3 -; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: bic lr, lr, #3 -; CHECK-NEXT: sub.w lr, lr, #4 -; CHECK-NEXT: add.w lr, r4, lr, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.s32 q0, [r0] @@ -703,13 +682,8 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: b .LBB7_9 ; CHECK-NEXT: .LBB7_4: @ %vector.ph -; CHECK-NEXT: add.w r7, r12, #3 -; CHECK-NEXT: movs r6, #1 -; CHECK-NEXT: bic r7, r7, #3 ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: subs r7, #4 -; CHECK-NEXT: add.w lr, r6, r7, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB7_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r5, r0, r4 @@ -918,12 +892,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: add.w lr, r12, #3 -; CHECK-NEXT: movs r4, #1 -; CHECK-NEXT: bic lr, lr, #3 -; CHECK-NEXT: sub.w lr, lr, #4 -; CHECK-NEXT: add.w lr, r4, lr, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u32 q0, [r0] @@ -1016,11 +985,7 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: b .LBB9_8 ; CHECK-NEXT: .LBB9_4: @ %vector.ph -; CHECK-NEXT: add.w r4, r12, #3 -; CHECK-NEXT: bic r4, r4, #3 -; CHECK-NEXT: subs r4, #4 -; CHECK-NEXT: add.w lr, lr, r4, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB9_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0] @@ -1217,13 +1182,8 @@ define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: add.w r12, r3, #7 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: sub.w r12, r12, #8 -; CHECK-NEXT: add.w lr, lr, r12, lsr #3 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dlstp.16 lr, lr +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB10_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r4, r1, r12 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll index 02bf12ce62004..fdf04db82207f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -9,13 +9,8 @@ define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* no ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmov q1, q0 @@ -82,13 +77,8 @@ define dso_local i32 @mul_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r1, r2, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: bic r1, r1, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: subs r1, #4 -; CHECK-NEXT: add.w lr, r3, r1, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r1, r2 @@ -148,13 +138,8 @@ define dso_local i32 @add_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: adds r1, r2, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: bic r1, r1, #3 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: subs r1, #4 -; CHECK-NEXT: add.w lr, r3, r1, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r1, r2 @@ -213,12 +198,7 @@ define dso_local void @vector_mul_const(i32* noalias nocapture %a, i32* noalias ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] @@ -272,12 +252,7 @@ define dso_local void @vector_add_const(i32* noalias nocapture %a, i32* noalias ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #3 -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: add.w lr, lr, r12, lsr #2 -; CHECK-NEXT: dlstp.32 lr, lr +; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] @@ -331,13 +306,8 @@ define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i8(i8* noalias nocaptur ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r4, pc} -; CHECK-NEXT: add.w r12, r3, #15 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #15 -; CHECK-NEXT: sub.w r12, r12, #16 -; CHECK-NEXT: add.w lr, lr, r12, lsr #4 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dlstp.8 lr, lr +; CHECK-NEXT: dlstp.8 lr, r3 ; CHECK-NEXT: .LBB5_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r4, r1, r12 @@ -396,12 +366,7 @@ define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i16(i16* noalias nocapt ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: popeq {r7, pc} -; CHECK-NEXT: add.w r12, r3, #7 -; CHECK-NEXT: mov.w lr, #1 -; CHECK-NEXT: bic r12, r12, #7 -; CHECK-NEXT: sub.w r12, r12, #8 -; CHECK-NEXT: add.w lr, lr, r12, lsr #3 -; CHECK-NEXT: dlstp.16 lr, lr +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r1] diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir index 69f23f6050131..99f6e39d3712e 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -195,12 +195,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 - ; CHECK: renamable $r12 = t2ADDri renamable $r3, 15, 14, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 15, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 16, 14, $noreg, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 35, 14, $noreg, $noreg - ; CHECK: $lr = MVE_WLSTP_8 renamable $lr, %bb.1 + ; CHECK: $lr = MVE_WLSTP_8 renamable $r3, %bb.1 ; CHECK: tB %bb.3, 14, $noreg ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) @@ -323,12 +318,7 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 - ; CHECK: renamable $r12 = t2ADDri renamable $r3, 7, 14, $noreg, $noreg - ; CHECK: renamable $lr = t2MOVi 1, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2BICri killed renamable $r12, 7, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 8, 14, $noreg, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 27, 14, $noreg, $noreg - ; CHECK: $lr = MVE_WLSTP_16 renamable $lr, %bb.1 + ; CHECK: $lr = MVE_WLSTP_16 renamable $r3, %bb.1 ; CHECK: tB %bb.2, 14, $noreg ; CHECK: bb.1.vector.body: ; CHECK: successors: %bb.2(0x04000000), %bb.1(0x7c000000) @@ -437,13 +427,8 @@ body: | ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 - ; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14, $noreg - ; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14, $noreg, $noreg - ; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg - ; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg ; CHECK: renamable $r12 = t2MOVi 0, 14, $noreg, $noreg - ; CHECK: $lr = MVE_WLSTP_32 renamable $lr, %bb.1 + ; CHECK: $lr = MVE_WLSTP_32 $r2, %bb.1 ; CHECK: tB %bb.4, 14, $noreg ; CHECK: bb.1.vector.ph: ; CHECK: successors: %bb.2(0x80000000) From 4a649ad21aa282d08f90ae655369235c2aaf5ad5 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Tue, 26 Nov 2019 10:21:20 +0000 Subject: [PATCH 022/591] [AArch64][SVE] Implement floating-point conversion intrinsics Summary: Adds intrinsics for the following: - fcvt - fcvtzs & fcvtzu - scvtf & ucvtf - fcvtlt, fcvtnt - fcvtx & fcvtxnt Reviewers: huntergr, sdesmalen, dancgr, mgudim, efriedma Reviewed By: sdesmalen Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70180 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 67 ++- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 76 ++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 29 +- .../AArch64/sve-intrinsics-fp-converts.ll | 400 ++++++++++++++++++ .../AArch64/sve2-intrinsics-fp-converts.ll | 84 ++++ 5 files changed, 614 insertions(+), 42 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 836911128ec46..c74b17c988959 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -849,6 +849,20 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class AdvSIMD_SVE_FCVT_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + + class AdvSIMD_SVE_FCVTZS_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMVectorOfBitcastsToInt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_PUNPKHI_Intrinsic : Intrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], @@ -861,6 +875,13 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; + class AdvSIMD_SVE_SCVTF_Intrinsic + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + class AdvSIMD_SVE_TSMUL_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, @@ -1048,6 +1069,16 @@ def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_Reduce_Intrinsic; def int_aarch64_sve_fminv : AdvSIMD_SVE_Reduce_Intrinsic; def int_aarch64_sve_fminnmv : AdvSIMD_SVE_Reduce_Intrinsic; +// +// Floating-point conversions +// + +def int_aarch64_sve_fcvt : AdvSIMD_SVE_FCVT_Intrinsic; +def int_aarch64_sve_fcvtzs : AdvSIMD_SVE_FCVTZS_Intrinsic; +def int_aarch64_sve_fcvtzu : AdvSIMD_SVE_FCVTZS_Intrinsic; +def int_aarch64_sve_scvtf : AdvSIMD_SVE_SCVTF_Intrinsic; +def int_aarch64_sve_ucvtf : AdvSIMD_SVE_SCVTF_Intrinsic; + // // Floating-point comparisons // @@ -1061,7 +1092,41 @@ def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic; def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic; def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic; -def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>; + +def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>; + +def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>; // // Predicate operations diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index a4ea2cab13eba..3b4e97ed844af 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -889,40 +889,40 @@ let Predicates = [HasSVE] in { defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">; - def FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, ElementSizeS>; - def FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, ElementSizeS>; - def SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, ElementSizeH>; - def SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, ElementSizeS>; - def UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, ElementSizeS>; - def UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, ElementSizeH>; - def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>; - def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>; - def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>; - def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>; - def FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, ElementSizeD>; - def FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, ElementSizeD>; - def FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, ElementSizeD>; - def FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, ElementSizeD>; - def SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, ElementSizeS>; - def SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, ElementSizeD>; - def SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, ElementSizeS>; - def SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, ElementSizeD>; - def UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, ElementSizeD>; - def UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, ElementSizeD>; - def SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, ElementSizeD>; - def UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>; - def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>; - def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>; - def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>; - def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>; - def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>; - def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>; + defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv16i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv16i1, nxv8f16, ElementSizeS>; + defm SCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110010, "scvtf", ZPR16, ZPR16, int_aarch64_sve_scvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm SCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010100, "scvtf", ZPR32, ZPR32, int_aarch64_sve_scvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_StoS : sve_fp_2op_p_zd<0b1010101, "ucvtf", ZPR32, ZPR32, int_aarch64_sve_ucvtf, nxv4f32, nxv4i1, nxv4i32, ElementSizeS>; + defm UCVTF_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0110011, "ucvtf", ZPR16, ZPR16, int_aarch64_sve_ucvtf, nxv8f16, nxv8i1, nxv8i16, ElementSizeH>; + defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu, nxv8i16, nxv8i1, nxv8f16, ElementSizeH>; + defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu, nxv4i32, nxv4i1, nxv4f32, ElementSizeS>; + defm FCVT_ZPmZ_DtoH : sve_fp_2op_p_zd<0b1101000, "fcvt", ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_HtoD : sve_fp_2op_p_zd<0b1101001, "fcvt", ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVT_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1101010, "fcvt", ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVT_ZPmZ_StoD : sve_fp_2op_p_zd<0b1101011, "fcvt", ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv16i1, nxv4f32, ElementSizeD>; + defm SCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110000, "scvtf", ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoD : sve_fp_2op_p_zd<0b1110001, "ucvtf", ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32, nxv2f64, nxv16i1, nxv4i32, ElementSizeD>; + defm UCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110101, "ucvtf", ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110100, "scvtf", ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_StoH : sve_fp_2op_p_zd<0b0110100, "scvtf", ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32, nxv8f16, nxv16i1, nxv4i32, ElementSizeS>; + defm SCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110110, "scvtf", ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1110101, "ucvtf", ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64, nxv4f32, nxv16i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoH : sve_fp_2op_p_zd<0b0110111, "ucvtf", ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64, nxv8f16, nxv16i1, nxv2i64, ElementSizeD>; + defm SCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110110, "scvtf", ZPR64, ZPR64, int_aarch64_sve_scvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm UCVTF_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1110111, "ucvtf", ZPR64, ZPR64, int_aarch64_sve_ucvtf, nxv2f64, nxv2i1, nxv2i64, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64, nxv4i32, nxv16i1, nxv2f64, ElementSizeD>; + defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>; + defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>; + defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16, nxv4i32, nxv16i1, nxv8f16, ElementSizeS>; + defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16, nxv2i64, nxv16i1, nxv8f16, ElementSizeD>; + defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32, nxv2i64, nxv16i1, nxv4f32, ElementSizeD>; + defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; + defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>; defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>; @@ -1422,10 +1422,10 @@ let Predicates = [HasSVE2] in { defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; // SVE2 floating-point convert precision - defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">; - defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">; - defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">; - def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; + defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">; + defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx">; + defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt", "int_aarch64_sve_fcvtnt">; + defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt", "int_aarch64_sve_fcvtlt">; // SVE2 floating-point pairwise operations defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 855510e7f5568..38f318849b33e 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1633,18 +1633,26 @@ class sve2_fp_convert_precision opc, string asm, let Constraints = "$Zd = $_Zd"; } -multiclass sve2_fp_convert_down_narrow { +multiclass sve2_fp_convert_down_narrow { def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>; def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat(op # _f16f32), nxv8f16, nxv16i1, nxv4f32, !cast(NAME # _StoH)>; + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; } -multiclass sve2_fp_convert_up_long { +multiclass sve2_fp_convert_up_long { def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>; def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>; + + def : SVE_3_Op_Pat(op # _f32f16), nxv4f32, nxv16i1, nxv8f16, !cast(NAME # _HtoS)>; + def : SVE_3_Op_Pat(op # _f64f32), nxv2f64, nxv16i1, nxv4f32, !cast(NAME # _StoD)>; } -multiclass sve2_fp_convert_down_odd_rounding { +multiclass sve2_fp_convert_down_odd_rounding_top { def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>; + + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; } //===----------------------------------------------------------------------===// @@ -1830,6 +1838,16 @@ class sve_fp_2op_p_zd opc, string asm, RegisterOperand i_zprtype, let ElementSize = size; } +multiclass sve_fp_2op_p_zd opc, string asm, + RegisterOperand i_zprtype, + RegisterOperand o_zprtype, + SDPatternOperator op, ValueType vt1, + ValueType vt2, ValueType vt3, ElementSizeEnum Sz> { + def NAME : sve_fp_2op_p_zd; + + def : SVE_3_Op_Pat(NAME)>; +} + multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; @@ -1846,6 +1864,11 @@ multiclass sve2_fp_flogb { def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>; } +multiclass sve2_fp_convert_down_odd_rounding { + def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>; + def : SVE_3_Op_Pat(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast(NAME # _DtoS)>; +} + //===----------------------------------------------------------------------===// // SVE Floating Point Unary Operations - Unpredicated Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..e777a2f3b8b04 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll @@ -0,0 +1,400 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; FCVT +; + +define @fcvt_f16_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f16_f32: +; CHECK: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f16f32( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f16_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f16_f64: +; CHECK: fcvt z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f16f64( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f32_f16: +; CHECK: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f32_f64: +; CHECK: fcvt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f64_f16: +; CHECK: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvt_f64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvt_f64_f32: +; CHECK: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvt.f64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTZS +; + +define @fcvtzs_i16_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i16_f16: +; CHECK: fcvtzs z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f32: +; CHECK: fcvtzs z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f64: +; CHECK: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f16: +; CHECK: fcvtzs z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i32_f64: +; CHECK: fcvtzs z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f16: +; CHECK: fcvtzs z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzs_i64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzs_i64_f32: +; CHECK: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzs.i64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTZU +; + +define @fcvtzu_i16_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i16_f16: +; CHECK: fcvtzu z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f32: +; CHECK: fcvtzu z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f64: +; CHECK: fcvtzu z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f16: +; CHECK: fcvtzu z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i32_f64: +; CHECK: fcvtzu z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i32f64( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f16: +; CHECK: fcvtzu z0.d, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i64f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtzu_i64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtzu_i64_f32: +; CHECK: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtzu.i64f32( %a, + %pg, + %b) + ret %out +} + +; +; SCVTF +; + +define @scvtf_f16_i16( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i16: +; CHECK: scvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f32_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f32_i32: +; CHECK: scvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f64_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f64_i64: +; CHECK: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f16_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i32: +; CHECK: scvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f16i32( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f16_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f16_i64: +; CHECK: scvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f16i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f32_i64( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f32_i64: +; CHECK: scvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f32i64( %a, + %pg, + %b) + ret %out +} + +define @scvtf_f64_i32( %a, %pg, %b) { +; CHECK-LABEL: scvtf_f64_i32: +; CHECK: scvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.scvtf.f64i32( %a, + %pg, + %b) + ret %out +} + +; +; UCVTF +; + +define @ucvtf_f16_i16( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i16: +; CHECK: ucvtf z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f32_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f32_i32: +; CHECK: ucvtf z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f64_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f64_i64: +; CHECK: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f16_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i32: +; CHECK: ucvtf z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f16i32( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f16_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f16_i64: +; CHECK: ucvtf z0.h, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f16i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f32_i64( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f32_i64: +; CHECK: ucvtf z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f32i64( %a, + %pg, + %b) + ret %out +} + +define @ucvtf_f64_i32( %a, %pg, %b) { +; CHECK-LABEL: ucvtf_f64_i32: +; CHECK: ucvtf z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.ucvtf.f64i32( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.fcvt.f16f32(, , ) +declare @llvm.aarch64.sve.fcvt.f16f64(, , ) +declare @llvm.aarch64.sve.fcvt.f32f16(, , ) +declare @llvm.aarch64.sve.fcvt.f32f64(, , ) +declare @llvm.aarch64.sve.fcvt.f64f16(, , ) +declare @llvm.aarch64.sve.fcvt.f64f32(, , ) + +declare @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(, , ) +declare @llvm.aarch64.sve.fcvtzs.i32f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.i32f64(, , ) +declare @llvm.aarch64.sve.fcvtzs.i64f16(, , ) +declare @llvm.aarch64.sve.fcvtzs.i64f32(, , ) + +declare @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(, , ) +declare @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(, , ) +declare @llvm.aarch64.sve.fcvtzu.i32f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.i32f64(, , ) +declare @llvm.aarch64.sve.fcvtzu.i64f16(, , ) +declare @llvm.aarch64.sve.fcvtzu.i64f32(, , ) + +declare @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(, , ) +declare @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(, , ) +declare @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(, , ) +declare @llvm.aarch64.sve.scvtf.f16i32(, , ) +declare @llvm.aarch64.sve.scvtf.f16i64(, , ) +declare @llvm.aarch64.sve.scvtf.f32i64(, , ) +declare @llvm.aarch64.sve.scvtf.f64i32(, , ) + +declare @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(, , ) +declare @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f16i32(, , ) +declare @llvm.aarch64.sve.ucvtf.f16i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f32i64(, , ) +declare @llvm.aarch64.sve.ucvtf.f64i32(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..4d110fee41c9f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; +; FCVTLT +; + +define @fcvtlt_f32_f16( %a, %pg, %b) { +; CHECK-LABEL: fcvtlt_f32_f16: +; CHECK: fcvtlt z0.s, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtlt.f32f16( %a, + %pg, + %b) + ret %out +} + +define @fcvtlt_f64_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtlt_f64_f32: +; CHECK: fcvtlt z0.d, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtlt.f64f32( %a, + %pg, + %b) + ret %out +} + +; +; FCVTNT +; + +define @fcvtnt_f16_f32( %a, %pg, %b) { +; CHECK-LABEL: fcvtnt_f16_f32: +; CHECK: fcvtnt z0.h, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.f16f32( %a, + %pg, + %b) + ret %out +} + +define @fcvtnt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtnt_f32_f64: +; CHECK: fcvtnt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtnt.f32f64( %a, + %pg, + %b) + ret %out +} + +; +; FCVTX +; + +define @fcvtx_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtx_f32_f64: +; CHECK: fcvtx z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtx.f32f64( %a, + %pg, + %b) + ret %out +} + +; +; FCVTXNT +; + +define @fcvtxnt_f32_f64( %a, %pg, %b) { +; CHECK-LABEL: fcvtxnt_f32_f64: +; CHECK: fcvtxnt z0.s, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fcvtxnt.f32f64( %a, + %pg, + %b) + ret %out +} + +declare @llvm.aarch64.sve.fcvtlt.f32f16(, , ) +declare @llvm.aarch64.sve.fcvtlt.f64f32(, , ) +declare @llvm.aarch64.sve.fcvtnt.f16f32(, , ) +declare @llvm.aarch64.sve.fcvtnt.f32f64(, , ) +declare @llvm.aarch64.sve.fcvtx.f32f64(, , ) +declare @llvm.aarch64.sve.fcvtxnt.f32f64(, , ) From e54c83ec4dd493f2c6a483be2f6f3fc93624d10a Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 26 Nov 2019 10:44:49 +0000 Subject: [PATCH 023/591] [OpenCL] Add work-group and miscellaneous vector builtin functions Add the work-group and miscellaneous vector builtin functions from the OpenCL C specification. Patch by Pierre Gondois and Sven van Haastregt. --- clang/lib/Sema/OpenCLBuiltins.td | 57 ++++++++++++++++++- .../SemaOpenCL/fdeclare-opencl-builtins.cl | 8 +++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td index 0bd4c51a04c2a..353e0c1d8c8d2 100644 --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -274,14 +274,21 @@ def Event : Type<"Event", QualType<"OCLEventTy">>; def VecAndScalar: IntList<"VecAndScalar", [1, 2, 3, 4, 8, 16]>; def VecNoScalar : IntList<"VecNoScalar", [2, 3, 4, 8, 16]>; def Vec1 : IntList<"Vec1", [1]>; +def Vec2 : IntList<"Vec2", [2]>; +def Vec4 : IntList<"Vec4", [4]>; +def Vec8 : IntList<"Vec8", [8]>; +def Vec16 : IntList<"Vec16", [16]>; def Vec1234 : IntList<"Vec1234", [1, 2, 3, 4]>; // Type lists. -def TLAll : TypeList<"TLAll", [Char, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; +def TLAll : TypeList<"TLAll", [Char, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; +def TLAllUnsigned : TypeList<"TLAllUnsigned", [UChar, UChar, UShort, UShort, UInt, UInt, ULong, ULong, UInt, ULong, UShort]>; def TLFloat : TypeList<"TLFloat", [Float, Double, Half]>; def TLSignedInts : TypeList<"TLSignedInts", [Char, Short, Int, Long]>; def TLUnsignedInts : TypeList<"TLUnsignedInts", [UChar, UShort, UInt, ULong]>; +def TLIntLongFloats : TypeList<"TLIntLongFloats", [Int, UInt, Long, ULong, Float, Double, Half]>; + // All unsigned integer types twice, to facilitate unsigned return types for e.g. // uchar abs(char) and // uchar abs(uchar). @@ -306,6 +313,8 @@ def SGenTypeN : GenericType<"SGenTypeN", TLSignedInts, VecAndScalar def UGenTypeN : GenericType<"UGenTypeN", TLUnsignedInts, VecAndScalar>; // Float def FGenTypeN : GenericType<"FGenTypeN", TLFloat, VecAndScalar>; +// (u)int, (u)long, and all floats +def IntLongFloatGenType1 : GenericType<"IntLongFloatGenType1", TLIntLongFloats, Vec1>; // GenType definitions for every single base type (e.g. fp32 only). // Names are like: GenTypeFloatVecAndScalar. @@ -867,6 +876,31 @@ foreach Type = [Int, UInt] in { } } +//-------------------------------------------------------------------- +// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions +// --- Table 19 --- +foreach name = ["shuffle"] in { + foreach VSize1 = [Vec2, Vec4, Vec8, Vec16] in { + foreach VSize2 = [Vec2, Vec4, Vec8, Vec16] in { + def : Builtin, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAllUnsigned" # VSize1.Name, TLAllUnsigned, VSize1>], + Attr.Const>; + } + } +} +foreach name = ["shuffle2"] in { + foreach VSize1 = [Vec2, Vec4, Vec8, Vec16] in { + foreach VSize2 = [Vec2, Vec4, Vec8, Vec16] in { + def : Builtin, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAllUnsigned" # VSize1.Name, TLAllUnsigned, VSize1>], + Attr.Const>; + } + } +} + //-------------------------------------------------------------------- // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14: Image Read and Write Functions // OpenCL Extension v2.0 s5.1.8 and s6.1.8: Image Read and Write Functions @@ -1020,6 +1054,27 @@ foreach aQual = ["WO", "RW"] in { } +//-------------------------------------------------------------------- +// OpenCL v2.0 s6.13.15 - Work-group Functions +// --- Table 26 --- +let MinVersion = CL20 in { + foreach name = ["work_group_all", "work_group_any"] in { + def : Builtin; + } + foreach name = ["work_group_broadcast"] in { + def : Builtin; + def : Builtin; + def : Builtin; + } + foreach op = ["add", "min", "max"] in { + foreach name = ["work_group_reduce_", "work_group_scan_exclusive_", + "work_group_scan_inclusive_"] in { + def : Builtin; + } + } +} + + // OpenCL v2.0 s9.17.3: Additions to section 6.13.1: Work-Item Functions let MinVersion = CL20 in { let Extension = "cl_khr_subgroups" in { diff --git a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl index 97a01a1fe9311..589d04c64e82d 100644 --- a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl +++ b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl @@ -32,6 +32,7 @@ typedef float float4 __attribute__((ext_vector_type(4))); typedef half half4 __attribute__((ext_vector_type(4))); typedef int int2 __attribute__((ext_vector_type(2))); typedef int int4 __attribute__((ext_vector_type(4))); +typedef uint uint4 __attribute__((ext_vector_type(4))); typedef long long2 __attribute__((ext_vector_type(2))); #endif @@ -67,6 +68,13 @@ char4 test_int(char c, char4 c4) { return max(c4, c); } +kernel void basic_vector_misc(float4 a) { + float4 res; + uint4 mask = (uint4)(1, 2, 3, 4); + + res = shuffle(a, mask); +} + kernel void basic_image_readonly(read_only image2d_t image_read_only_image2d) { int2 i2; sampler_t sampler; From e8013ef53ac0cd82f9c921abd0b2fa1aa8b2f20c Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 26 Nov 2019 11:27:22 +0100 Subject: [PATCH 024/591] [lldb][NFC] Extract array type parsing from DWARFASTParserClang::ParseTypeFromDWARF Part of the work to split up this monolithic parsing function. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 176 +++++++++--------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 3 + 2 files changed, 95 insertions(+), 84 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 6d02f1b5ee833..ea0f02778941c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1201,90 +1201,9 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, } break; case DW_TAG_array_type: { - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), - DW_TAG_value_to_name(tag), type_name_cstr); - - DWARFDIE type_die = attrs.type.Reference(); - Type *element_type = dwarf->ResolveTypeUID(type_die, true); - - if (element_type) { - auto array_info = ParseChildArrayInfo(die); - if (array_info) { - attrs.byte_stride = array_info->byte_stride; - attrs.bit_stride = array_info->bit_stride; - } - if (attrs.byte_stride == 0 && attrs.bit_stride == 0) - attrs.byte_stride = element_type->GetByteSize().getValueOr(0); - CompilerType array_element_type = element_type->GetForwardCompilerType(); - - if (ClangASTContext::IsCXXClassType(array_element_type) && - !array_element_type.GetCompleteType()) { - ModuleSP module_sp = die.GetModule(); - if (module_sp) { - if (die.GetCU()->GetProducer() == eProducerClang) - module_sp->ReportError( - "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " - "class/union/struct element type DIE 0x%8.8x that is a " - "forward declaration, not a complete definition.\nTry " - "compiling the source file with -fstandalone-debug or " - "disable -gmodules", - die.GetOffset(), type_die.GetOffset()); - else - module_sp->ReportError( - "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " - "class/union/struct element type DIE 0x%8.8x that is a " - "forward declaration, not a complete definition.\nPlease " - "file a bug against the compiler and include the " - "preprocessed output for %s", - die.GetOffset(), type_die.GetOffset(), - GetUnitName(die).c_str()); - } - - // We have no choice other than to pretend that the element class - // type is complete. If we don't do this, clang will crash when - // trying to layout the class. Since we provide layout - // assistance, all ivars in this class and other classes will be - // fine, this is the best we can do short of crashing. - if (ClangASTContext::StartTagDeclarationDefinition( - array_element_type)) { - ClangASTContext::CompleteTagDeclarationDefinition(array_element_type); - } else { - module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to " - "start its definition.\nPlease file a " - "bug and attach the file at the start " - "of this error message", - type_die.GetOffset()); - } - } - - uint64_t array_element_bit_stride = - attrs.byte_stride * 8 + attrs.bit_stride; - if (array_info && array_info->element_orders.size() > 0) { - uint64_t num_elements = 0; - auto end = array_info->element_orders.rend(); - for (auto pos = array_info->element_orders.rbegin(); pos != end; - ++pos) { - num_elements = *pos; - clang_type = m_ast.CreateArrayType(array_element_type, num_elements, - attrs.is_vector); - array_element_type = clang_type; - array_element_bit_stride = - num_elements ? array_element_bit_stride * num_elements - : array_element_bit_stride; - } - } else { - clang_type = m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector); - } - ConstString empty_name; - type_sp = std::make_shared( - die.GetID(), dwarf, empty_name, array_element_bit_stride / 8, nullptr, - dwarf->GetUID(type_die), Type::eEncodingIsUID, &attrs.decl, - clang_type, Type::ResolveState::Full); - type_sp->SetEncodingType(element_type); - m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID()); - } - } break; - + type_sp = ParseArrayType(die, attrs); + break; + } case DW_TAG_ptr_to_member_type: { type_sp = ParsePointerToMemberType(die, attrs); break; @@ -1303,6 +1222,95 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return UpdateSymbolContextScopeForType(sc, die, type_sp); } +TypeSP DWARFASTParserClang::ParseArrayType(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + SymbolFileDWARF *dwarf = die.GetDWARF(); + + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr); + + DWARFDIE type_die = attrs.type.Reference(); + Type *element_type = dwarf->ResolveTypeUID(type_die, true); + + if (element_type) { + auto array_info = ParseChildArrayInfo(die); + if (array_info) { + attrs.byte_stride = array_info->byte_stride; + attrs.bit_stride = array_info->bit_stride; + } + if (attrs.byte_stride == 0 && attrs.bit_stride == 0) + attrs.byte_stride = element_type->GetByteSize().getValueOr(0); + CompilerType array_element_type = element_type->GetForwardCompilerType(); + + if (ClangASTContext::IsCXXClassType(array_element_type) && + !array_element_type.GetCompleteType()) { + ModuleSP module_sp = die.GetModule(); + if (module_sp) { + if (die.GetCU()->GetProducer() == eProducerClang) + module_sp->ReportError( + "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " + "class/union/struct element type DIE 0x%8.8x that is a " + "forward declaration, not a complete definition.\nTry " + "compiling the source file with -fstandalone-debug or " + "disable -gmodules", + die.GetOffset(), type_die.GetOffset()); + else + module_sp->ReportError( + "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " + "class/union/struct element type DIE 0x%8.8x that is a " + "forward declaration, not a complete definition.\nPlease " + "file a bug against the compiler and include the " + "preprocessed output for %s", + die.GetOffset(), type_die.GetOffset(), GetUnitName(die).c_str()); + } + + // We have no choice other than to pretend that the element class + // type is complete. If we don't do this, clang will crash when + // trying to layout the class. Since we provide layout + // assistance, all ivars in this class and other classes will be + // fine, this is the best we can do short of crashing. + if (ClangASTContext::StartTagDeclarationDefinition(array_element_type)) { + ClangASTContext::CompleteTagDeclarationDefinition(array_element_type); + } else { + module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to " + "start its definition.\nPlease file a " + "bug and attach the file at the start " + "of this error message", + type_die.GetOffset()); + } + } + + uint64_t array_element_bit_stride = + attrs.byte_stride * 8 + attrs.bit_stride; + CompilerType clang_type; + if (array_info && array_info->element_orders.size() > 0) { + uint64_t num_elements = 0; + auto end = array_info->element_orders.rend(); + for (auto pos = array_info->element_orders.rbegin(); pos != end; ++pos) { + num_elements = *pos; + clang_type = m_ast.CreateArrayType(array_element_type, num_elements, + attrs.is_vector); + array_element_type = clang_type; + array_element_bit_stride = num_elements + ? array_element_bit_stride * num_elements + : array_element_bit_stride; + } + } else { + clang_type = + m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector); + } + ConstString empty_name; + TypeSP type_sp = std::make_shared( + die.GetID(), dwarf, empty_name, array_element_bit_stride / 8, nullptr, + dwarf->GetUID(type_die), Type::eEncodingIsUID, &attrs.decl, clang_type, + Type::ResolveState::Full); + type_sp->SetEncodingType(element_type); + m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID()); + return type_sp; + } + return nullptr; +} + TypeSP DWARFASTParserClang::ParsePointerToMemberType( const DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs) { SymbolFileDWARF *dwarf = die.GetDWARF(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index b92c397394544..53e7b012592c1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -170,6 +170,9 @@ class DWARFASTParserClang : public DWARFASTParser { lldb::ModuleSP GetModuleForType(const DWARFDIE &die); private: + // FIXME: attrs should be passed as a const reference. + lldb::TypeSP ParseArrayType(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParsePointerToMemberType(const DWARFDIE &die, const ParsedDWARFTypeAttributes &attrs); }; From 94939650b632cd44e518a9adeb16ab82dddd9375 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 26 Nov 2019 11:54:30 +0100 Subject: [PATCH 025/591] [lldb][NFCI] Extract subroutine parsing from DWARFASTParserClang::ParseTypeFromDWARF Part of the work to split up this monolithic parsing function. Should be NFC but due to the kafkaesque control flow in this case statement this might have some unintended side effects. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 756 +++++++++--------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 2 + 2 files changed, 384 insertions(+), 374 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index ea0f02778941c..aca87b3a5b1c4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -798,428 +798,436 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, case DW_TAG_inlined_subroutine: case DW_TAG_subprogram: case DW_TAG_subroutine_type: { - bool is_variadic = false; - bool is_static = false; - bool has_template_params = false; + type_sp = ParseSubroutine(die, attrs); + break; + } + case DW_TAG_array_type: { + type_sp = ParseArrayType(die, attrs); + break; + } + case DW_TAG_ptr_to_member_type: { + type_sp = ParsePointerToMemberType(die, attrs); + break; + } + default: + dwarf->GetObjectFile()->GetModule()->ReportError( + "{0x%8.8x}: unhandled type tag 0x%4.4x (%s), please file a bug and " + "attach the file at the start of this error message", + die.GetOffset(), tag, DW_TAG_value_to_name(tag)); + break; + } - unsigned type_quals = 0; + // TODO: We should consider making the switch above exhaustive to simplify + // control flow in ParseTypeFromDWARF. Then, we could simply replace this + // return statement with a call to llvm_unreachable. + return UpdateSymbolContextScopeForType(sc, die, type_sp); +} - std::string object_pointer_name; - if (attrs.object_pointer) { - const char *object_pointer_name_cstr = attrs.object_pointer.GetName(); - if (object_pointer_name_cstr) - object_pointer_name = object_pointer_name_cstr; - } +TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | + DWARF_LOG_LOOKUPS)); - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), - DW_TAG_value_to_name(tag), type_name_cstr); + SymbolFileDWARF *dwarf = die.GetDWARF(); + const dw_tag_t tag = die.Tag(); - CompilerType return_clang_type; - Type *func_type = NULL; - - if (attrs.type.IsValid()) - func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); - - if (func_type) - return_clang_type = func_type->GetForwardCompilerType(); - else - return_clang_type = m_ast.GetBasicType(eBasicTypeVoid); - - std::vector function_param_types; - std::vector function_param_decls; - - // Parse the function children for the parameters - - DWARFDIE decl_ctx_die; - clang::DeclContext *containing_decl_ctx = - GetClangDeclContextContainingDIE(die, &decl_ctx_die); - const clang::Decl::Kind containing_decl_kind = - containing_decl_ctx->getDeclKind(); - - bool is_cxx_method = DeclKindIsCXXClass(containing_decl_kind); - // Start off static. This will be set to false in - // ParseChildParameters(...) if we find a "this" parameters as the - // first parameter - if (is_cxx_method) { - is_static = true; - } - - if (die.HasChildren()) { - bool skip_artificial = true; - ParseChildParameters(containing_decl_ctx, die, skip_artificial, is_static, - is_variadic, has_template_params, - function_param_types, function_param_decls, - type_quals); - } - - bool ignore_containing_context = false; - // Check for templatized class member functions. If we had any - // DW_TAG_template_type_parameter or DW_TAG_template_value_parameter - // the DW_TAG_subprogram DIE, then we can't let this become a method in - // a class. Why? Because templatized functions are only emitted if one - // of the templatized methods is used in the current compile unit and - // we will end up with classes that may or may not include these member - // functions and this means one class won't match another class - // definition and it affects our ability to use a class in the clang - // expression parser. So for the greater good, we currently must not - // allow any template member functions in a class definition. - if (is_cxx_method && has_template_params) { - ignore_containing_context = true; - is_cxx_method = false; - } - - // clang_type will get the function prototype clang type after this - // call - clang_type = m_ast.CreateFunctionType( - return_clang_type, function_param_types.data(), - function_param_types.size(), is_variadic, type_quals); - - if (attrs.name) { - bool type_handled = false; - if (tag == DW_TAG_subprogram || tag == DW_TAG_inlined_subroutine) { - ObjCLanguage::MethodName objc_method(attrs.name.GetStringRef(), true); - if (objc_method.IsValid(true)) { - CompilerType class_opaque_type; - ConstString class_name(objc_method.GetClassName()); - if (class_name) { - TypeSP complete_objc_class_type_sp( - dwarf->FindCompleteObjCDefinitionTypeForDIE(DWARFDIE(), - class_name, false)); - - if (complete_objc_class_type_sp) { - CompilerType type_clang_forward_type = - complete_objc_class_type_sp->GetForwardCompilerType(); - if (ClangASTContext::IsObjCObjectOrInterfaceType( - type_clang_forward_type)) - class_opaque_type = type_clang_forward_type; - } + bool is_variadic = false; + bool is_static = false; + bool has_template_params = false; + + unsigned type_quals = 0; + + std::string object_pointer_name; + if (attrs.object_pointer) { + const char *object_pointer_name_cstr = attrs.object_pointer.GetName(); + if (object_pointer_name_cstr) + object_pointer_name = object_pointer_name_cstr; + } + + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr); + + CompilerType return_clang_type; + Type *func_type = NULL; + + if (attrs.type.IsValid()) + func_type = dwarf->ResolveTypeUID(attrs.type.Reference(), true); + + if (func_type) + return_clang_type = func_type->GetForwardCompilerType(); + else + return_clang_type = m_ast.GetBasicType(eBasicTypeVoid); + + std::vector function_param_types; + std::vector function_param_decls; + + // Parse the function children for the parameters + + DWARFDIE decl_ctx_die; + clang::DeclContext *containing_decl_ctx = + GetClangDeclContextContainingDIE(die, &decl_ctx_die); + const clang::Decl::Kind containing_decl_kind = + containing_decl_ctx->getDeclKind(); + + bool is_cxx_method = DeclKindIsCXXClass(containing_decl_kind); + // Start off static. This will be set to false in + // ParseChildParameters(...) if we find a "this" parameters as the + // first parameter + if (is_cxx_method) { + is_static = true; + } + + if (die.HasChildren()) { + bool skip_artificial = true; + ParseChildParameters(containing_decl_ctx, die, skip_artificial, is_static, + is_variadic, has_template_params, + function_param_types, function_param_decls, + type_quals); + } + + bool ignore_containing_context = false; + // Check for templatized class member functions. If we had any + // DW_TAG_template_type_parameter or DW_TAG_template_value_parameter + // the DW_TAG_subprogram DIE, then we can't let this become a method in + // a class. Why? Because templatized functions are only emitted if one + // of the templatized methods is used in the current compile unit and + // we will end up with classes that may or may not include these member + // functions and this means one class won't match another class + // definition and it affects our ability to use a class in the clang + // expression parser. So for the greater good, we currently must not + // allow any template member functions in a class definition. + if (is_cxx_method && has_template_params) { + ignore_containing_context = true; + is_cxx_method = false; + } + + // clang_type will get the function prototype clang type after this + // call + CompilerType clang_type = m_ast.CreateFunctionType( + return_clang_type, function_param_types.data(), + function_param_types.size(), is_variadic, type_quals); + + if (attrs.name) { + bool type_handled = false; + if (tag == DW_TAG_subprogram || tag == DW_TAG_inlined_subroutine) { + ObjCLanguage::MethodName objc_method(attrs.name.GetStringRef(), true); + if (objc_method.IsValid(true)) { + CompilerType class_opaque_type; + ConstString class_name(objc_method.GetClassName()); + if (class_name) { + TypeSP complete_objc_class_type_sp( + dwarf->FindCompleteObjCDefinitionTypeForDIE(DWARFDIE(), + class_name, false)); + + if (complete_objc_class_type_sp) { + CompilerType type_clang_forward_type = + complete_objc_class_type_sp->GetForwardCompilerType(); + if (ClangASTContext::IsObjCObjectOrInterfaceType( + type_clang_forward_type)) + class_opaque_type = type_clang_forward_type; } + } - if (class_opaque_type) { - // If accessibility isn't set to anything valid, assume public - // for now... - if (attrs.accessibility == eAccessNone) - attrs.accessibility = eAccessPublic; - - clang::ObjCMethodDecl *objc_method_decl = - m_ast.AddMethodToObjCObjectType( - class_opaque_type, attrs.name.GetCString(), clang_type, - attrs.accessibility, attrs.is_artificial, is_variadic); - type_handled = objc_method_decl != NULL; - if (type_handled) { - LinkDeclContextToDIE(objc_method_decl, die); - m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); - } else { - dwarf->GetObjectFile()->GetModule()->ReportError( - "{0x%8.8x}: invalid Objective-C method 0x%4.4x (%s), " - "please file a bug and attach the file at the start of " - "this error message", - die.GetOffset(), tag, DW_TAG_value_to_name(tag)); - } + if (class_opaque_type) { + // If accessibility isn't set to anything valid, assume public + // for now... + if (attrs.accessibility == eAccessNone) + attrs.accessibility = eAccessPublic; + + clang::ObjCMethodDecl *objc_method_decl = + m_ast.AddMethodToObjCObjectType( + class_opaque_type, attrs.name.GetCString(), clang_type, + attrs.accessibility, attrs.is_artificial, is_variadic); + type_handled = objc_method_decl != NULL; + if (type_handled) { + LinkDeclContextToDIE(objc_method_decl, die); + m_ast.SetMetadataAsUserID(objc_method_decl, die.GetID()); + } else { + dwarf->GetObjectFile()->GetModule()->ReportError( + "{0x%8.8x}: invalid Objective-C method 0x%4.4x (%s), " + "please file a bug and attach the file at the start of " + "this error message", + die.GetOffset(), tag, DW_TAG_value_to_name(tag)); } - } else if (is_cxx_method) { - // Look at the parent of this DIE and see if is is a class or - // struct and see if this is actually a C++ method - Type *class_type = dwarf->ResolveType(decl_ctx_die); - if (class_type) { - bool alternate_defn = false; - if (class_type->GetID() != decl_ctx_die.GetID() || - IsClangModuleFwdDecl(decl_ctx_die)) { - alternate_defn = true; - - // We uniqued the parent class of this function to another - // class so we now need to associate all dies under - // "decl_ctx_die" to DIEs in the DIE for "class_type"... - DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID()); - - if (class_type_die) { - std::vector failures; - - CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die, - class_type, failures); - - // FIXME do something with these failures that's - // smarter than just dropping them on the ground. - // Unfortunately classes don't like having stuff added - // to them after their definitions are complete... - - type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; - if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { - type_sp = type_ptr->shared_from_this(); - break; - } + } + } else if (is_cxx_method) { + // Look at the parent of this DIE and see if is is a class or + // struct and see if this is actually a C++ method + Type *class_type = dwarf->ResolveType(decl_ctx_die); + if (class_type) { + bool alternate_defn = false; + if (class_type->GetID() != decl_ctx_die.GetID() || + IsClangModuleFwdDecl(decl_ctx_die)) { + alternate_defn = true; + + // We uniqued the parent class of this function to another + // class so we now need to associate all dies under + // "decl_ctx_die" to DIEs in the DIE for "class_type"... + DWARFDIE class_type_die = dwarf->GetDIE(class_type->GetID()); + + if (class_type_die) { + std::vector failures; + + CopyUniqueClassMethodTypes(decl_ctx_die, class_type_die, + class_type, failures); + + // FIXME do something with these failures that's + // smarter than just dropping them on the ground. + // Unfortunately classes don't like having stuff added + // to them after their definitions are complete... + + Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; + if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { + return type_ptr->shared_from_this(); } } + } - if (attrs.specification.IsValid()) { - // We have a specification which we are going to base our - // function prototype off of, so we need this type to be - // completed so that the m_die_to_decl_ctx for the method in - // the specification has a valid clang decl context. - class_type->GetForwardCompilerType(); - // If we have a specification, then the function type should - // have been made with the specification and not with this - // die. - DWARFDIE spec_die = attrs.specification.Reference(); - clang::DeclContext *spec_clang_decl_ctx = - GetClangDeclContextForDIE(spec_die); - if (spec_clang_decl_ctx) { - LinkDeclContextToDIE(spec_clang_decl_ctx, die); - } else { - dwarf->GetObjectFile()->GetModule()->ReportWarning( - "0x%8.8" PRIx64 ": DW_AT_specification(0x%8.8x" - ") has no decl\n", - die.GetID(), spec_die.GetOffset()); - } - type_handled = true; - } else if (attrs.abstract_origin.IsValid()) { - // We have a specification which we are going to base our - // function prototype off of, so we need this type to be - // completed so that the m_die_to_decl_ctx for the method in - // the abstract origin has a valid clang decl context. - class_type->GetForwardCompilerType(); - - DWARFDIE abs_die = attrs.abstract_origin.Reference(); - clang::DeclContext *abs_clang_decl_ctx = - GetClangDeclContextForDIE(abs_die); - if (abs_clang_decl_ctx) { - LinkDeclContextToDIE(abs_clang_decl_ctx, die); - } else { - dwarf->GetObjectFile()->GetModule()->ReportWarning( - "0x%8.8" PRIx64 ": DW_AT_abstract_origin(0x%8.8x" - ") has no decl\n", - die.GetID(), abs_die.GetOffset()); - } - type_handled = true; + if (attrs.specification.IsValid()) { + // We have a specification which we are going to base our + // function prototype off of, so we need this type to be + // completed so that the m_die_to_decl_ctx for the method in + // the specification has a valid clang decl context. + class_type->GetForwardCompilerType(); + // If we have a specification, then the function type should + // have been made with the specification and not with this + // die. + DWARFDIE spec_die = attrs.specification.Reference(); + clang::DeclContext *spec_clang_decl_ctx = + GetClangDeclContextForDIE(spec_die); + if (spec_clang_decl_ctx) { + LinkDeclContextToDIE(spec_clang_decl_ctx, die); } else { - CompilerType class_opaque_type = - class_type->GetForwardCompilerType(); - if (ClangASTContext::IsCXXClassType(class_opaque_type)) { - if (class_opaque_type.IsBeingDefined() || alternate_defn) { - if (!is_static && !die.HasChildren()) { - // We have a C++ member function with no children (this - // pointer!) and clang will get mad if we try and make - // a function that isn't well formed in the DWARF, so - // we will just skip it... - type_handled = true; - } else { - bool add_method = true; - if (alternate_defn) { - // If an alternate definition for the class exists, - // then add the method only if an equivalent is not - // already present. - clang::CXXRecordDecl *record_decl = - m_ast.GetAsCXXRecordDecl( - class_opaque_type.GetOpaqueQualType()); - if (record_decl) { - for (auto method_iter = record_decl->method_begin(); - method_iter != record_decl->method_end(); - method_iter++) { - clang::CXXMethodDecl *method_decl = *method_iter; - if (method_decl->getNameInfo().getAsString() == - attrs.name.GetStringRef()) { - if (method_decl->getType() == - ClangUtil::GetQualType(clang_type)) { - add_method = false; - LinkDeclContextToDIE(method_decl, die); - type_handled = true; - - break; - } + dwarf->GetObjectFile()->GetModule()->ReportWarning( + "0x%8.8" PRIx64 ": DW_AT_specification(0x%8.8x" + ") has no decl\n", + die.GetID(), spec_die.GetOffset()); + } + type_handled = true; + } else if (attrs.abstract_origin.IsValid()) { + // We have a specification which we are going to base our + // function prototype off of, so we need this type to be + // completed so that the m_die_to_decl_ctx for the method in + // the abstract origin has a valid clang decl context. + class_type->GetForwardCompilerType(); + + DWARFDIE abs_die = attrs.abstract_origin.Reference(); + clang::DeclContext *abs_clang_decl_ctx = + GetClangDeclContextForDIE(abs_die); + if (abs_clang_decl_ctx) { + LinkDeclContextToDIE(abs_clang_decl_ctx, die); + } else { + dwarf->GetObjectFile()->GetModule()->ReportWarning( + "0x%8.8" PRIx64 ": DW_AT_abstract_origin(0x%8.8x" + ") has no decl\n", + die.GetID(), abs_die.GetOffset()); + } + type_handled = true; + } else { + CompilerType class_opaque_type = + class_type->GetForwardCompilerType(); + if (ClangASTContext::IsCXXClassType(class_opaque_type)) { + if (class_opaque_type.IsBeingDefined() || alternate_defn) { + if (!is_static && !die.HasChildren()) { + // We have a C++ member function with no children (this + // pointer!) and clang will get mad if we try and make + // a function that isn't well formed in the DWARF, so + // we will just skip it... + type_handled = true; + } else { + bool add_method = true; + if (alternate_defn) { + // If an alternate definition for the class exists, + // then add the method only if an equivalent is not + // already present. + clang::CXXRecordDecl *record_decl = + m_ast.GetAsCXXRecordDecl( + class_opaque_type.GetOpaqueQualType()); + if (record_decl) { + for (auto method_iter = record_decl->method_begin(); + method_iter != record_decl->method_end(); + method_iter++) { + clang::CXXMethodDecl *method_decl = *method_iter; + if (method_decl->getNameInfo().getAsString() == + attrs.name.GetStringRef()) { + if (method_decl->getType() == + ClangUtil::GetQualType(clang_type)) { + add_method = false; + LinkDeclContextToDIE(method_decl, die); + type_handled = true; + + break; } } } } + } - if (add_method) { - llvm::PrettyStackTraceFormat stack_trace( - "SymbolFileDWARF::ParseType() is adding a method " - "%s to class %s in DIE 0x%8.8" PRIx64 " from %s", - attrs.name.GetCString(), - class_type->GetName().GetCString(), die.GetID(), - dwarf->GetObjectFile() - ->GetFileSpec() - .GetPath() - .c_str()); - - const bool is_attr_used = false; - // Neither GCC 4.2 nor clang++ currently set a valid - // accessibility in the DWARF for C++ methods... - // Default to public for now... - if (attrs.accessibility == eAccessNone) - attrs.accessibility = eAccessPublic; - - clang::CXXMethodDecl *cxx_method_decl = - m_ast.AddMethodToCXXRecordType( - class_opaque_type.GetOpaqueQualType(), - attrs.name.GetCString(), attrs.mangled_name, - clang_type, attrs.accessibility, attrs.is_virtual, - is_static, attrs.is_inline, attrs.is_explicit, - is_attr_used, attrs.is_artificial); - - type_handled = cxx_method_decl != NULL; - // Artificial methods are always handled even when we - // don't create a new declaration for them. - type_handled |= attrs.is_artificial; - - if (cxx_method_decl) { - LinkDeclContextToDIE(cxx_method_decl, die); - - ClangASTMetadata metadata; - metadata.SetUserID(die.GetID()); - - if (!object_pointer_name.empty()) { - metadata.SetObjectPtrName( - object_pointer_name.c_str()); - LLDB_LOGF(log, - "Setting object pointer name: %s on method " - "object %p.\n", - object_pointer_name.c_str(), - static_cast(cxx_method_decl)); - } - m_ast.SetMetadata(cxx_method_decl, metadata); - } else { - ignore_containing_context = true; + if (add_method) { + llvm::PrettyStackTraceFormat stack_trace( + "SymbolFileDWARF::ParseType() is adding a method " + "%s to class %s in DIE 0x%8.8" PRIx64 " from %s", + attrs.name.GetCString(), + class_type->GetName().GetCString(), die.GetID(), + dwarf->GetObjectFile() + ->GetFileSpec() + .GetPath() + .c_str()); + + const bool is_attr_used = false; + // Neither GCC 4.2 nor clang++ currently set a valid + // accessibility in the DWARF for C++ methods... + // Default to public for now... + if (attrs.accessibility == eAccessNone) + attrs.accessibility = eAccessPublic; + + clang::CXXMethodDecl *cxx_method_decl = + m_ast.AddMethodToCXXRecordType( + class_opaque_type.GetOpaqueQualType(), + attrs.name.GetCString(), attrs.mangled_name, + clang_type, attrs.accessibility, attrs.is_virtual, + is_static, attrs.is_inline, attrs.is_explicit, + is_attr_used, attrs.is_artificial); + + type_handled = cxx_method_decl != NULL; + // Artificial methods are always handled even when we + // don't create a new declaration for them. + type_handled |= attrs.is_artificial; + + if (cxx_method_decl) { + LinkDeclContextToDIE(cxx_method_decl, die); + + ClangASTMetadata metadata; + metadata.SetUserID(die.GetID()); + + if (!object_pointer_name.empty()) { + metadata.SetObjectPtrName( + object_pointer_name.c_str()); + LLDB_LOGF(log, + "Setting object pointer name: %s on method " + "object %p.\n", + object_pointer_name.c_str(), + static_cast(cxx_method_decl)); } + m_ast.SetMetadata(cxx_method_decl, metadata); + } else { + ignore_containing_context = true; } } - } else { - // We were asked to parse the type for a method in a - // class, yet the class hasn't been asked to complete - // itself through the clang::ExternalASTSource protocol, - // so we need to just have the class complete itself and - // do things the right way, then our - // DIE should then have an entry in the - // dwarf->GetDIEToType() map. First - // we need to modify the dwarf->GetDIEToType() so it - // doesn't think we are trying to parse this DIE - // anymore... - dwarf->GetDIEToType()[die.GetDIE()] = NULL; - - // Now we get the full type to force our class type to - // complete itself using the clang::ExternalASTSource - // protocol which will parse all base classes and all - // methods (including the method for this DIE). - class_type->GetFullCompilerType(); - - // The type for this DIE should have been filled in the - // function call above - type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; - if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { - type_sp = type_ptr->shared_from_this(); - break; - } - - // FIXME This is fixing some even uglier behavior but we - // really need to - // uniq the methods of each class as well as the class - // itself. - type_handled = true; } + } else { + // We were asked to parse the type for a method in a + // class, yet the class hasn't been asked to complete + // itself through the clang::ExternalASTSource protocol, + // so we need to just have the class complete itself and + // do things the right way, then our + // DIE should then have an entry in the + // dwarf->GetDIEToType() map. First + // we need to modify the dwarf->GetDIEToType() so it + // doesn't think we are trying to parse this DIE + // anymore... + dwarf->GetDIEToType()[die.GetDIE()] = NULL; + + // Now we get the full type to force our class type to + // complete itself using the clang::ExternalASTSource + // protocol which will parse all base classes and all + // methods (including the method for this DIE). + class_type->GetFullCompilerType(); + + // The type for this DIE should have been filled in the + // function call above + Type *type_ptr = dwarf->GetDIEToType()[die.GetDIE()]; + if (type_ptr && type_ptr != DIE_IS_BEING_PARSED) { + return type_ptr->shared_from_this(); + } + + // FIXME This is fixing some even uglier behavior but we + // really need to + // uniq the methods of each class as well as the class + // itself. + type_handled = true; } } } } } + } - if (!type_handled) { - clang::FunctionDecl *function_decl = nullptr; - clang::FunctionDecl *template_function_decl = nullptr; + if (!type_handled) { + clang::FunctionDecl *function_decl = nullptr; + clang::FunctionDecl *template_function_decl = nullptr; - if (attrs.abstract_origin.IsValid()) { - DWARFDIE abs_die = attrs.abstract_origin.Reference(); + if (attrs.abstract_origin.IsValid()) { + DWARFDIE abs_die = attrs.abstract_origin.Reference(); - if (dwarf->ResolveType(abs_die)) { - function_decl = llvm::dyn_cast_or_null( - GetCachedClangDeclContextForDIE(abs_die)); + if (dwarf->ResolveType(abs_die)) { + function_decl = llvm::dyn_cast_or_null( + GetCachedClangDeclContextForDIE(abs_die)); - if (function_decl) { - LinkDeclContextToDIE(function_decl, die); - } + if (function_decl) { + LinkDeclContextToDIE(function_decl, die); } } + } - if (!function_decl) { - // We just have a function that isn't part of a class - function_decl = m_ast.CreateFunctionDeclaration( + if (!function_decl) { + // We just have a function that isn't part of a class + function_decl = m_ast.CreateFunctionDeclaration( + ignore_containing_context ? m_ast.GetTranslationUnitDecl() + : containing_decl_ctx, + attrs.name.GetCString(), clang_type, attrs.storage, + attrs.is_inline); + + if (has_template_params) { + ClangASTContext::TemplateParameterInfos template_param_infos; + ParseTemplateParameterInfos(die, template_param_infos); + template_function_decl = m_ast.CreateFunctionDeclaration( ignore_containing_context ? m_ast.GetTranslationUnitDecl() : containing_decl_ctx, attrs.name.GetCString(), clang_type, attrs.storage, attrs.is_inline); + clang::FunctionTemplateDecl *func_template_decl = + m_ast.CreateFunctionTemplateDecl( + containing_decl_ctx, template_function_decl, + attrs.name.GetCString(), template_param_infos); + m_ast.CreateFunctionTemplateSpecializationInfo( + function_decl, func_template_decl, template_param_infos); + } - if (has_template_params) { - ClangASTContext::TemplateParameterInfos template_param_infos; - ParseTemplateParameterInfos(die, template_param_infos); - template_function_decl = m_ast.CreateFunctionDeclaration( - ignore_containing_context ? m_ast.GetTranslationUnitDecl() - : containing_decl_ctx, - attrs.name.GetCString(), clang_type, attrs.storage, - attrs.is_inline); - clang::FunctionTemplateDecl *func_template_decl = - m_ast.CreateFunctionTemplateDecl( - containing_decl_ctx, template_function_decl, - attrs.name.GetCString(), template_param_infos); - m_ast.CreateFunctionTemplateSpecializationInfo( - function_decl, func_template_decl, template_param_infos); - } - - lldbassert(function_decl); + lldbassert(function_decl); - if (function_decl) { - LinkDeclContextToDIE(function_decl, die); + if (function_decl) { + LinkDeclContextToDIE(function_decl, die); - if (!function_param_decls.empty()) { - m_ast.SetFunctionParameters(function_decl, + if (!function_param_decls.empty()) { + m_ast.SetFunctionParameters(function_decl, + &function_param_decls.front(), + function_param_decls.size()); + if (template_function_decl) + m_ast.SetFunctionParameters(template_function_decl, &function_param_decls.front(), function_param_decls.size()); - if (template_function_decl) - m_ast.SetFunctionParameters(template_function_decl, - &function_param_decls.front(), - function_param_decls.size()); - } + } - ClangASTMetadata metadata; - metadata.SetUserID(die.GetID()); + ClangASTMetadata metadata; + metadata.SetUserID(die.GetID()); - if (!object_pointer_name.empty()) { - metadata.SetObjectPtrName(object_pointer_name.c_str()); - LLDB_LOGF(log, - "Setting object pointer name: %s on function " - "object %p.", - object_pointer_name.c_str(), - static_cast(function_decl)); - } - m_ast.SetMetadata(function_decl, metadata); + if (!object_pointer_name.empty()) { + metadata.SetObjectPtrName(object_pointer_name.c_str()); + LLDB_LOGF(log, + "Setting object pointer name: %s on function " + "object %p.", + object_pointer_name.c_str(), + static_cast(function_decl)); } + m_ast.SetMetadata(function_decl, metadata); } } } - type_sp = std::make_shared( - die.GetID(), dwarf, attrs.name, llvm::None, nullptr, LLDB_INVALID_UID, - Type::eEncodingIsUID, &attrs.decl, clang_type, Type::ResolveState::Full); - assert(type_sp.get()); - } break; - - case DW_TAG_array_type: { - type_sp = ParseArrayType(die, attrs); - break; - } - case DW_TAG_ptr_to_member_type: { - type_sp = ParsePointerToMemberType(die, attrs); - break; } - default: - dwarf->GetObjectFile()->GetModule()->ReportError( - "{0x%8.8x}: unhandled type tag 0x%4.4x (%s), please file a bug and " - "attach the file at the start of this error message", - die.GetOffset(), tag, DW_TAG_value_to_name(tag)); - break; - } - - // TODO: We should consider making the switch above exhaustive to simplify - // control flow in ParseTypeFromDWARF. Then, we could simply replace this - // return statement with a call to llvm_unreachable. - return UpdateSymbolContextScopeForType(sc, die, type_sp); + return std::make_shared( + die.GetID(), dwarf, attrs.name, llvm::None, nullptr, LLDB_INVALID_UID, + Type::eEncodingIsUID, &attrs.decl, clang_type, Type::ResolveState::Full); } TypeSP DWARFASTParserClang::ParseArrayType(const DWARFDIE &die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 53e7b012592c1..ef15590f2654b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -170,6 +170,8 @@ class DWARFASTParserClang : public DWARFASTParser { lldb::ModuleSP GetModuleForType(const DWARFDIE &die); private: + lldb::TypeSP ParseSubroutine(const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); // FIXME: attrs should be passed as a const reference. lldb::TypeSP ParseArrayType(const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); From 8f2b57d257e87b0244f9883cd8075898005ba757 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 26 Nov 2019 12:21:31 +0100 Subject: [PATCH 026/591] [lldb][NFC] Extract enum parsing from DWARFASTParserClang::ParseTypeFromDWARF Part of the work to split up this monolithic parsing function. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 211 +++++++++--------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 2 + 2 files changed, 113 insertions(+), 100 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index aca87b3a5b1c4..17e0924e3e588 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -694,106 +694,9 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, } case DW_TAG_enumeration_type: { - if (attrs.is_forward_declaration) { - type_sp = ParseTypeFromClangModule(sc, die, log); - if (type_sp) - return type_sp; - - DWARFDeclContext die_decl_ctx; - die.GetDWARFDeclContext(die_decl_ctx); - - type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx); - - if (!type_sp) { - SymbolFileDWARFDebugMap *debug_map_symfile = - dwarf->GetDebugMapSymfile(); - if (debug_map_symfile) { - // We weren't able to find a full declaration in this DWARF, - // see if we have a declaration anywhere else... - type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext( - die_decl_ctx); - } - } - - if (type_sp) { - if (log) { - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a " - "forward declaration, complete type is 0x%8.8" PRIx64, - static_cast(this), die.GetOffset(), - DW_TAG_value_to_name(tag), attrs.name.GetCString(), - type_sp->GetID()); - } - - // We found a real definition for this type elsewhere so lets use - // it and cache the fact that we found a complete type for this - // die - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - clang::DeclContext *defn_decl_ctx = - GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID())); - if (defn_decl_ctx) - LinkDeclContextToDIE(defn_decl_ctx, die); - return type_sp; - } - } - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), - DW_TAG_value_to_name(tag), type_name_cstr); - - CompilerType enumerator_clang_type; - clang_type.SetCompilerType( - &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE())); - if (!clang_type) { - if (attrs.type.IsValid()) { - Type *enumerator_type = - dwarf->ResolveTypeUID(attrs.type.Reference(), true); - if (enumerator_type) - enumerator_clang_type = enumerator_type->GetFullCompilerType(); - } - - if (!enumerator_clang_type) { - if (attrs.byte_size) { - enumerator_clang_type = - m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( - NULL, DW_ATE_signed, *attrs.byte_size * 8); - } else { - enumerator_clang_type = m_ast.GetBasicType(eBasicTypeInt); - } - } - - clang_type = m_ast.CreateEnumerationType( - attrs.name.GetCString(), - GetClangDeclContextContainingDIE(die, nullptr), attrs.decl, - enumerator_clang_type, attrs.is_scoped_enum); - } else { - enumerator_clang_type = - m_ast.GetEnumerationIntegerType(clang_type.GetOpaqueQualType()); - } - - LinkDeclContextToDIE(ClangASTContext::GetDeclContextForType(clang_type), - die); - - type_sp = std::make_shared( - die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, - dwarf->GetUID(attrs.type.Reference()), Type::eEncodingIsUID, - &attrs.decl, clang_type, Type::ResolveState::Forward); - - if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { - if (die.HasChildren()) { - bool is_signed = false; - enumerator_clang_type.IsIntegerType(is_signed); - ParseChildEnumerators(clang_type, is_signed, - type_sp->GetByteSize().getValueOr(0), die); - } - ClangASTContext::CompleteTagDeclarationDefinition(clang_type); - } else { - dwarf->GetObjectFile()->GetModule()->ReportError( - "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its " - "definition.\nPlease file a bug and attach the file at the " - "start of this error message", - die.GetOffset(), attrs.name.GetCString()); - } - } break; + type_sp = ParseEnum(sc, die, attrs); + break; + } case DW_TAG_inlined_subroutine: case DW_TAG_subprogram: @@ -823,6 +726,114 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return UpdateSymbolContextScopeForType(sc, die, type_sp); } +TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | + DWARF_LOG_LOOKUPS)); + SymbolFileDWARF *dwarf = die.GetDWARF(); + const dw_tag_t tag = die.Tag(); + TypeSP type_sp; + + if (attrs.is_forward_declaration) { + type_sp = ParseTypeFromClangModule(sc, die, log); + if (type_sp) + return type_sp; + + DWARFDeclContext die_decl_ctx; + die.GetDWARFDeclContext(die_decl_ctx); + + type_sp = dwarf->FindDefinitionTypeForDWARFDeclContext(die_decl_ctx); + + if (!type_sp) { + SymbolFileDWARFDebugMap *debug_map_symfile = dwarf->GetDebugMapSymfile(); + if (debug_map_symfile) { + // We weren't able to find a full declaration in this DWARF, + // see if we have a declaration anywhere else... + type_sp = debug_map_symfile->FindDefinitionTypeForDWARFDeclContext( + die_decl_ctx); + } + } + + if (type_sp) { + if (log) { + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF(%p) - 0x%8.8x: %s type \"%s\" is a " + "forward declaration, complete type is 0x%8.8" PRIx64, + static_cast(this), die.GetOffset(), + DW_TAG_value_to_name(tag), attrs.name.GetCString(), + type_sp->GetID()); + } + + // We found a real definition for this type elsewhere so lets use + // it and cache the fact that we found a complete type for this + // die + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + clang::DeclContext *defn_decl_ctx = + GetCachedClangDeclContextForDIE(dwarf->GetDIE(type_sp->GetID())); + if (defn_decl_ctx) + LinkDeclContextToDIE(defn_decl_ctx, die); + return type_sp; + } + } + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\")\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr); + + CompilerType enumerator_clang_type; + CompilerType clang_type; + clang_type.SetCompilerType( + &m_ast, dwarf->GetForwardDeclDieToClangType().lookup(die.GetDIE())); + if (!clang_type) { + if (attrs.type.IsValid()) { + Type *enumerator_type = + dwarf->ResolveTypeUID(attrs.type.Reference(), true); + if (enumerator_type) + enumerator_clang_type = enumerator_type->GetFullCompilerType(); + } + + if (!enumerator_clang_type) { + if (attrs.byte_size) { + enumerator_clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( + NULL, DW_ATE_signed, *attrs.byte_size * 8); + } else { + enumerator_clang_type = m_ast.GetBasicType(eBasicTypeInt); + } + } + + clang_type = m_ast.CreateEnumerationType( + attrs.name.GetCString(), GetClangDeclContextContainingDIE(die, nullptr), + attrs.decl, enumerator_clang_type, attrs.is_scoped_enum); + } else { + enumerator_clang_type = + m_ast.GetEnumerationIntegerType(clang_type.GetOpaqueQualType()); + } + + LinkDeclContextToDIE(ClangASTContext::GetDeclContextForType(clang_type), die); + + type_sp = std::make_shared( + die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, + dwarf->GetUID(attrs.type.Reference()), Type::eEncodingIsUID, &attrs.decl, + clang_type, Type::ResolveState::Forward); + + if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { + if (die.HasChildren()) { + bool is_signed = false; + enumerator_clang_type.IsIntegerType(is_signed); + ParseChildEnumerators(clang_type, is_signed, + type_sp->GetByteSize().getValueOr(0), die); + } + ClangASTContext::CompleteTagDeclarationDefinition(clang_type); + } else { + dwarf->GetObjectFile()->GetModule()->ReportError( + "DWARF DIE at 0x%8.8x named \"%s\" was not able to start its " + "definition.\nPlease file a bug and attach the file at the " + "start of this error message", + die.GetOffset(), attrs.name.GetCString()); + } + return type_sp; +} + TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs) { Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index ef15590f2654b..0bca18ccd5d55 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -170,6 +170,8 @@ class DWARFASTParserClang : public DWARFASTParser { lldb::ModuleSP GetModuleForType(const DWARFDIE &die); private: + lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseSubroutine(const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); // FIXME: attrs should be passed as a const reference. From e73f78acd34360f7450b81167d9dc858ccddc262 Mon Sep 17 00:00:00 2001 From: Alexey Lapshin Date: Fri, 15 Nov 2019 21:48:55 +0300 Subject: [PATCH 027/591] [X86][MC] no error diagnostic for out-of-range jrcxz/jecxz/jcxz Fix for PR24072: X86 instructions jrcxz/jecxz/jcxz performs short jumps if rcx/ecx/cx register is 0 The maximum relative offset for a forward short jump is 127 Bytes (0x7F). The maximum relative offset for a backward short jump is 128 Bytes (0x80). Gnu assembler warns when the distance of the jump exceeds the maximum but llvm-as does not. Patch by Konstantin Belochapka and Alexey Lapshin Differential Revision: https://reviews.llvm.org/D70652 --- .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 27 ++++++++++++++----- llvm/test/MC/MachO/reloc.s | 2 +- llvm/test/MC/X86/x86-jcxz-loop-fixup.s | 26 ++++++++++++++++++ 3 files changed, 48 insertions(+), 7 deletions(-) create mode 100644 llvm/test/MC/X86/x86-jcxz-loop-fixup.s diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index f08fcb575bf00..1ccb9b7cbf748 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -12,6 +12,8 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -22,6 +24,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -114,12 +117,24 @@ class X86AsmBackend : public MCAsmBackend { assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); - // Check that uppper bits are either all zeros or all ones. - // Specifically ignore overflow/underflow as long as the leakage is - // limited to the lower bits. This is to remain compatible with - // other assemblers. - assert((Size == 0 || isIntN(Size * 8 + 1, Value)) && - "Value does not fit in the Fixup field"); + int64_t SignedValue = static_cast(Value); + if ((Target.isAbsolute() || IsResolved) && + getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel) { + // check that PC relative fixup fits into the fixup size. + if (Size > 0 && !isIntN(Size * 8, SignedValue)) + Asm.getContext().reportError( + Fixup.getLoc(), "value of " + Twine(SignedValue) + + " is too large for field of " + Twine(Size) + + ((Size == 1) ? " byte." : " bytes.")); + } else { + // Check that uppper bits are either all zeros or all ones. + // Specifically ignore overflow/underflow as long as the leakage is + // limited to the lower bits. This is to remain compatible with + // other assemblers. + assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && + "Value does not fit in the Fixup field"); + } for (unsigned i = 0; i != Size; ++i) Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); diff --git a/llvm/test/MC/MachO/reloc.s b/llvm/test/MC/MachO/reloc.s index 1379d80eb310e..bab5d63d27f45 100644 --- a/llvm/test/MC/MachO/reloc.s +++ b/llvm/test/MC/MachO/reloc.s @@ -37,7 +37,7 @@ L0: .text _f0: L1: - jmp 0xbabecafe + jmp 0x7abecafe jmp L0 jmp L1 ret diff --git a/llvm/test/MC/X86/x86-jcxz-loop-fixup.s b/llvm/test/MC/X86/x86-jcxz-loop-fixup.s new file mode 100644 index 0000000000000..219c1bb52eb6b --- /dev/null +++ b/llvm/test/MC/X86/x86-jcxz-loop-fixup.s @@ -0,0 +1,26 @@ +# RUN: not llvm-mc -filetype=obj -triple=x86_64-linux-gnu %s 2>&1 | FileCheck %s + + .balign 128 +label00: +// CHECK: value of 253 is too large for field of 1 byte. + jecxz label01 +// CHECK: value of 251 is too large for field of 1 byte. + jrcxz label01 +// CHECK: value of 249 is too large for field of 1 byte. + loop label01 +// CHECK: value of 247 is too large for field of 1 byte. + loope label01 +// CHECK: value of 245 is too large for field of 1 byte. + loopne label01 + .balign 256 +label01: +// CHECK: value of -259 is too large for field of 1 byte. + jecxz label00 +// CHECK: value of -261 is too large for field of 1 byte. + jrcxz label00 +// CHECK: value of -263 is too large for field of 1 byte. + loop label00 +// CHECK: value of -265 is too large for field of 1 byte. + loope label00 +// CHECK: value of -267 is too large for field of 1 byte. + loopne label00 From 5f8b8d282048a9c535a90ab64bbadf576e348963 Mon Sep 17 00:00:00 2001 From: AndreyChurbanov Date: Tue, 26 Nov 2019 14:37:24 +0300 Subject: [PATCH 028/591] [openmp] Recognise ARMv7ve machine arch. Patch by raj.khem (Khem Raj) Differential Revision: https://reviews.llvm.org/D68543 --- openmp/runtime/src/kmp_platform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index 35e61a9cf3d0c..779c08e9771d5 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -143,7 +143,7 @@ #endif #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7A__) + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define KMP_ARCH_ARMV7 1 #endif From f69ac55d60d916e295ae0e507c5f4c2655360089 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 26 Nov 2019 13:59:37 +0300 Subject: [PATCH 029/591] [yaml2obj] - Teach tool to describe SHT_GNU_verdef section with a "Content" property. There is no way to set raw content for SHT_GNU_verdef section. This patch implements it. Differential revision: https://reviews.llvm.org/D70710 --- llvm/include/llvm/ObjectYAML/ELFYAML.h | 4 +- llvm/lib/ObjectYAML/ELFEmitter.cpp | 26 ++++-- llvm/lib/ObjectYAML/ELFYAML.cpp | 10 ++- .../tools/yaml2obj/ELF/verdef-section.yaml | 83 ++++++++++++++++++- llvm/tools/obj2yaml/elf2yaml.cpp | 4 +- 5 files changed, 114 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index a498621a2a13f..9e45efc4a5fec 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -397,7 +397,9 @@ struct VerdefEntry { }; struct VerdefSection : Section { - std::vector Entries; + Optional> Entries; + Optional Content; + llvm::yaml::Hex64 Info; VerdefSection() : Section(ChunkKind::Verdef) {} diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index e8b54a7e60200..069e3c19523b6 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -985,9 +985,19 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, raw_ostream &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + SHeader.sh_info = Section.Info; + + if (Section.Content) { + SHeader.sh_size = writeContent(OS, Section.Content, None); + return; + } + + if (!Section.Entries) + return; + uint64_t AuxCnt = 0; - for (size_t I = 0; I < Section.Entries.size(); ++I) { - const ELFYAML::VerdefEntry &E = Section.Entries[I]; + for (size_t I = 0; I < Section.Entries->size(); ++I) { + const ELFYAML::VerdefEntry &E = (*Section.Entries)[I]; Elf_Verdef VerDef; VerDef.vd_version = E.Version; @@ -996,7 +1006,7 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, VerDef.vd_hash = E.Hash; VerDef.vd_aux = sizeof(Elf_Verdef); VerDef.vd_cnt = E.VerNames.size(); - if (I == Section.Entries.size() - 1) + if (I == Section.Entries->size() - 1) VerDef.vd_next = 0; else VerDef.vd_next = @@ -1014,9 +1024,8 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, } } - SHeader.sh_size = Section.Entries.size() * sizeof(Elf_Verdef) + + SHeader.sh_size = Section.Entries->size() * sizeof(Elf_Verdef) + AuxCnt * sizeof(Elf_Verdaux); - SHeader.sh_info = Section.Info; } template @@ -1341,9 +1350,10 @@ template void ELFState::finalizeStrings() { DotDynstr.add(Aux.Name); } } else if (auto VerDef = dyn_cast(Sec)) { - for (const ELFYAML::VerdefEntry &E : VerDef->Entries) - for (StringRef Name : E.VerNames) - DotDynstr.add(Name); + if (VerDef->Entries) + for (const ELFYAML::VerdefEntry &E : *VerDef->Entries) + for (StringRef Name : E.VerNames) + DotDynstr.add(Name); } } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index a5e5894af04d4..ebda4cca97c83 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1074,7 +1074,8 @@ static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) { static void sectionMapping(IO &IO, ELFYAML::VerdefSection &Section) { commonSectionMapping(IO, Section); IO.mapRequired("Info", Section.Info); - IO.mapRequired("Entries", Section.Entries); + IO.mapOptional("Entries", Section.Entries); + IO.mapOptional("Content", Section.Content); } static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) { @@ -1419,6 +1420,13 @@ StringRef MappingTraits>::validate( return {}; } + if (const auto *VD = dyn_cast(C.get())) { + if (VD->Entries && VD->Content) + return "SHT_GNU_verdef: \"Entries\" and \"Content\" can't be used " + "together"; + return {}; + } + return {}; } diff --git a/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml b/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml index 439c428c19344..77798000ee68f 100644 --- a/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml @@ -1,6 +1,6 @@ ## Check we are able to handle SHT_GNU_verdef sections. -# RUN: yaml2obj %s -o %t +# RUN: yaml2obj --docnum=1 %s -o %t1 # RUN: llvm-readobj -V %t | FileCheck %s # CHECK: VersionDefinitions [ @@ -73,4 +73,83 @@ Sections: DynamicSymbols: - Name: foo Binding: STB_GLOBAL -... + +## Check we can use "Content" to describe the content. + +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: llvm-readobj --sections --section-data %t2 | FileCheck %s --check-prefix=CONTENT + +# CONTENT: Name: .gnu.version_d +# CONTENT-NEXT: Type: SHT_GNU_verdef +# CONTENT-NEXT: Flags [ (0x2) +# CONTENT-NEXT: SHF_ALLOC (0x2) +# CONTENT-NEXT: ] +# CONTENT-NEXT: Address: 0x0 +# CONTENT-NEXT: Offset: 0x40 +# CONTENT-NEXT: Size: 3 +# CONTENT-NEXT: Link: 0 +# CONTENT-NEXT: Info: 1 +# CONTENT-NEXT: AddressAlignment: +# CONTENT-NEXT: EntrySize: +# CONTENT-NEXT: SectionData ( +# CONTENT-NEXT: 0000: 112233 +# CONTENT-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Info: 0x0000000000000001 + Content: "112233" + +## Check we can omit "Content" and "Entries" fields to produce an empty SHT_GNU_verdef section. + +# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: llvm-readelf --sections %t3 | FileCheck %s --check-prefix=NO-PROPS + +# NO-PROPS: [Nr] Name Type Address Off Size ES Flg Lk Inf Al +# NO-PROPS: [ 1] .gnu.version_d VERDEF 0000000000000000 000040 000000 00 A 0 1 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Info: 0x0000000000000001 + +## Check we can't use both "Entries" and "Content" together. + +# RUN: not yaml2obj --docnum=4 %s -o %t4 2>&1 | FileCheck %s --check-prefix=BOTH + +# BOTH: error: SHT_GNU_verdef: "Entries" and "Content" can't be used together + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Info: 0x0000000000000001 + Content: "112233" + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - foo diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 3dc48b8b8802a..77d28d85e6a11 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -923,6 +923,8 @@ ELFDumper::dumpVerdefSection(const Elf_Shdr *Shdr) { if (!Contents) return Contents.takeError(); + S->Entries.emplace(); + llvm::ArrayRef Data = *Contents; const uint8_t *Buf = Data.data(); while (Buf) { @@ -942,7 +944,7 @@ ELFDumper::dumpVerdefSection(const Elf_Shdr *Shdr) { BufAux = Verdaux->vda_next ? BufAux + Verdaux->vda_next : nullptr; } - S->Entries.push_back(Entry); + S->Entries->push_back(Entry); Buf = Verdef->vd_next ? Buf + Verdef->vd_next : nullptr; } From c547c22f18973dceaf5b40dae1b4ad7d3dd4eab7 Mon Sep 17 00:00:00 2001 From: Kirill Bobyrev Date: Tue, 26 Nov 2019 13:45:04 +0100 Subject: [PATCH 030/591] [NFC] ASSERT_EQ before accessing items in containers As discussed offline, something different from `EXPECT_EQ` should be used to check if the container contains enough items before accessing them so that other tests can still be run even if the assertion fails as opposed to having `EXPECT_EQ` failing and then aborting the run due to the errors caused by out-of-bounds memory access. Reviewed by: ilya-biryukov Differential Revision: https://reviews.llvm.org/D70528 --- clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp | 5 ++++- clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index cb6d611503199..28f18e73d7a85 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1874,7 +1874,10 @@ TEST(CompletionTest, CompletionTokenRange) { Annotations TestCode(Text); auto Results = completions(Server, TestCode.code(), TestCode.point()); - EXPECT_EQ(Results.Completions.size(), 1u); + if (Results.Completions.size() != 1) { + ADD_FAILURE() << "Results.Completions.size() != 1"; + continue; + } EXPECT_THAT(Results.Completions.front().CompletionTokenRange, TestCode.range()); } diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index fe7a8898c5de4..3c0257849021d 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -709,7 +709,10 @@ void bar(X *x) { auto Parsed = TU.build(); for (const auto &D : Parsed.getDiagnostics()) { - EXPECT_EQ(D.Fixes.size(), 1u); + if (D.Fixes.size() != 1) { + ADD_FAILURE() << "D.Fixes.size() != 1"; + continue; + } EXPECT_EQ(D.Fixes[0].Message, std::string("Add include \"a.h\" for symbol X")); } From 8805316172a650d62cdb6d3854ef0e54fb300ca6 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 19 Nov 2019 15:23:36 +0100 Subject: [PATCH 031/591] [clangd] Speed up when building rename edit. Summary: We used to scan the code everytime when computing the LSP position to the offset (respect the LSP encoding). Now we only scan the source code once. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70441 --- clang-tools-extra/clangd/refactor/Rename.cpp | 90 ++++++++++++------- clang-tools-extra/clangd/refactor/Rename.h | 7 ++ .../clangd/unittests/RenameTests.cpp | 27 ++++++ 3 files changed, 90 insertions(+), 34 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index d4b186b4ca909..ab121d434c9c3 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -20,6 +20,7 @@ #include "clang/Tooling/Refactoring/Rename/USRFindingAction.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" namespace clang { namespace clangd { @@ -297,34 +298,6 @@ findOccurrencesOutsideFile(const NamedDecl &RenameDecl, return AffectedFiles; } -llvm::Expected> toRangeOffset(const clangd::Range &R, - llvm::StringRef Code) { - auto StartOffset = positionToOffset(Code, R.start); - if (!StartOffset) - return StartOffset.takeError(); - auto EndOffset = positionToOffset(Code, R.end); - if (!EndOffset) - return EndOffset.takeError(); - return std::make_pair(*StartOffset, *EndOffset); -}; - -llvm::Expected buildRenameEdit(llvm::StringRef InitialCode, - const std::vector &Occurrences, - llvm::StringRef NewName) { - tooling::Replacements RenameEdit; - for (const Range &Occurrence : Occurrences) { - // FIXME: !positionToOffset is O(N), optimize it. - auto RangeOffset = toRangeOffset(Occurrence, InitialCode); - if (!RangeOffset) - return RangeOffset.takeError(); - auto ByteLength = RangeOffset->second - RangeOffset->first; - if (auto Err = RenameEdit.add(tooling::Replacement( - InitialCode, RangeOffset->first, ByteLength, NewName))) - return std::move(Err); - } - return Edit(InitialCode, std::move(RenameEdit)); -} - // Index-based rename, it renames all occurrences outside of the main file. // // The cross-file rename is purely based on the index, as we don't want to @@ -358,7 +331,7 @@ llvm::Expected renameOutsideFile( llvm::inconvertibleErrorCode()); FileEdits Results; - for (const auto &FileAndOccurrences : AffectedFiles) { + for (auto &FileAndOccurrences : AffectedFiles) { llvm::StringRef FilePath = FileAndOccurrences.first(); auto AffectedFileCode = GetFileContent(FilePath); @@ -366,11 +339,14 @@ llvm::Expected renameOutsideFile( elog("Fail to read file content: {0}", AffectedFileCode.takeError()); continue; } - - auto RenameEdit = buildRenameEdit(*AffectedFileCode, - FileAndOccurrences.getValue(), NewName); - if (!RenameEdit) - return RenameEdit.takeError(); + auto RenameEdit = buildRenameEdit( + *AffectedFileCode, std::move(FileAndOccurrences.second), NewName); + if (!RenameEdit) { + return llvm::make_error( + llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath, + llvm::toString(RenameEdit.takeError())), + llvm::inconvertibleErrorCode()); + } if (!RenameEdit->Replacements.empty()) Results.insert({FilePath, std::move(*RenameEdit)}); } @@ -465,5 +441,51 @@ llvm::Expected rename(const RenameInputs &RInputs) { return Results; } +llvm::Expected buildRenameEdit(llvm::StringRef InitialCode, + std::vector Occurrences, + llvm::StringRef NewName) { + llvm::sort(Occurrences); + // These two always correspond to the same position. + Position LastPos{0, 0}; + size_t LastOffset = 0; + + auto Offset = [&](const Position &P) -> llvm::Expected { + assert(LastPos <= P && "malformed input"); + Position Shifted = { + P.line - LastPos.line, + P.line > LastPos.line ? P.character : P.character - LastPos.character}; + auto ShiftedOffset = + positionToOffset(InitialCode.substr(LastOffset), Shifted); + if (!ShiftedOffset) + return llvm::make_error( + llvm::formatv("fail to convert the position {0} to offset ({1})", P, + llvm::toString(ShiftedOffset.takeError())), + llvm::inconvertibleErrorCode()); + LastPos = P; + LastOffset += *ShiftedOffset; + return LastOffset; + }; + + std::vector> OccurrencesOffsets; + for (const auto &R : Occurrences) { + auto StartOffset = Offset(R.start); + if (!StartOffset) + return StartOffset.takeError(); + auto EndOffset = Offset(R.end); + if (!EndOffset) + return EndOffset.takeError(); + OccurrencesOffsets.push_back({*StartOffset, *EndOffset}); + } + + tooling::Replacements RenameEdit; + for (const auto &R : OccurrencesOffsets) { + auto ByteLength = R.second - R.first; + if (auto Err = RenameEdit.add( + tooling::Replacement(InitialCode, R.first, ByteLength, NewName))) + return std::move(Err); + } + return Edit(InitialCode, std::move(RenameEdit)); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h index 1427d7042585b..c8cfc6d058923 100644 --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -47,6 +47,13 @@ struct RenameInputs { /// in another file (per the index). llvm::Expected rename(const RenameInputs &RInputs); +/// Generates rename edits that replaces all given occurrences with the +/// NewName. +/// Exposed for testing only. +llvm::Expected buildRenameEdit(llvm::StringRef InitialCode, + std::vector Occurrences, + llvm::StringRef NewName); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 75b15e735abf7..47aca380f3e9d 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -638,6 +638,33 @@ TEST(CrossFileRenameTests, CrossFileOnLocalSymbol) { UnorderedElementsAre(Pair(Eq(Path), Eq(expectedResult(Code, NewName))))); } +TEST(CrossFileRenameTests, BuildRenameEdits) { + Annotations Code("[[😂]]"); + auto LSPRange = Code.range(); + auto Edit = buildRenameEdit(Code.code(), {LSPRange}, "abc"); + ASSERT_TRUE(bool(Edit)) << Edit.takeError(); + ASSERT_EQ(1UL, Edit->Replacements.size()); + EXPECT_EQ(4UL, Edit->Replacements.begin()->getLength()); + + // Test invalid range. + LSPRange.end = {10, 0}; // out of range + Edit = buildRenameEdit(Code.code(), {LSPRange}, "abc"); + EXPECT_FALSE(Edit); + EXPECT_THAT(llvm::toString(Edit.takeError()), + testing::HasSubstr("fail to convert")); + + // Normal ascii characters. + Annotations T(R"cpp( + [[range]] + [[range]] + [[range]] + )cpp"); + Edit = buildRenameEdit(T.code(), T.ranges(), "abc"); + ASSERT_TRUE(bool(Edit)) << Edit.takeError(); + EXPECT_EQ(applyEdits(FileEdits{{T.code(), std::move(*Edit)}}).front().second, + expectedResult(Code, expectedResult(T, "abc"))); +} + } // namespace } // namespace clangd } // namespace clang From 30fc94be237f26d3127d8bbc872d9e3b82f03590 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 26 Nov 2019 13:42:16 +0100 Subject: [PATCH 032/591] [lldb][NFC] Extract type modifier parsing from DWARFASTParserClang::ParseTypeFromDWARF Part of the work to split up this monolithic parsing function. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 431 +++++++++--------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 3 + 2 files changed, 226 insertions(+), 208 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 17e0924e3e588..8ead4ea4f519f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -463,13 +463,9 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, const dw_tag_t tag = die.Tag(); - Type::ResolveState resolve_state = Type::ResolveState::Unresolved; - - Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID; CompilerType clang_type; - TypeSP type_sp; - LanguageType cu_language = die.GetLanguage(); + switch (tag) { case DW_TAG_typedef: case DW_TAG_base_type: @@ -480,209 +476,9 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, case DW_TAG_restrict_type: case DW_TAG_volatile_type: case DW_TAG_unspecified_type: { - if (tag == DW_TAG_typedef && attrs.type.IsValid()) { - // Try to parse a typedef from the (DWARF embedded in the) Clang - // module file first as modules can contain typedef'ed - // structures that have no names like: - // - // typedef struct { int a; } Foo; - // - // In this case we will have a structure with no name and a - // typedef named "Foo" that points to this unnamed - // structure. The name in the typedef is the only identifier for - // the struct, so always try to get typedefs from Clang modules - // if possible. - // - // The type_sp returned will be empty if the typedef doesn't - // exist in a module file, so it is cheap to call this function - // just to check. - // - // If we don't do this we end up creating a TypeSP that says - // this is a typedef to type 0x123 (the DW_AT_type value would - // be 0x123 in the DW_TAG_typedef), and this is the unnamed - // structure type. We will have a hard time tracking down an - // unnammed structure type in the module debug info, so we make - // sure we don't get into this situation by always resolving - // typedefs from the module. - const DWARFDIE encoding_die = attrs.type.Reference(); - - // First make sure that the die that this is typedef'ed to _is_ - // just a declaration (DW_AT_declaration == 1), not a full - // definition since template types can't be represented in - // modules since only concrete instances of templates are ever - // emitted and modules won't contain those - if (encoding_die && - encoding_die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0) == 1) { - type_sp = ParseTypeFromClangModule(sc, die, log); - if (type_sp) - return type_sp; - } - } - - DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\") type => 0x%8.8lx\n", - die.GetID(), DW_TAG_value_to_name(tag), type_name_cstr, - encoding_uid.Reference()); - - switch (tag) { - default: - break; - - case DW_TAG_unspecified_type: - if (attrs.name == "nullptr_t" || attrs.name == "decltype(nullptr)") { - resolve_state = Type::ResolveState::Full; - clang_type = m_ast.GetBasicType(eBasicTypeNullPtr); - break; - } - // Fall through to base type below in case we can handle the type - // there... - LLVM_FALLTHROUGH; - - case DW_TAG_base_type: - resolve_state = Type::ResolveState::Full; - clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( - attrs.name.GetCString(), attrs.encoding, - attrs.byte_size.getValueOr(0) * 8); - break; - - case DW_TAG_pointer_type: - encoding_data_type = Type::eEncodingIsPointerUID; - break; - case DW_TAG_reference_type: - encoding_data_type = Type::eEncodingIsLValueReferenceUID; - break; - case DW_TAG_rvalue_reference_type: - encoding_data_type = Type::eEncodingIsRValueReferenceUID; - break; - case DW_TAG_typedef: - encoding_data_type = Type::eEncodingIsTypedefUID; - break; - case DW_TAG_const_type: - encoding_data_type = Type::eEncodingIsConstUID; - break; - case DW_TAG_restrict_type: - encoding_data_type = Type::eEncodingIsRestrictUID; - break; - case DW_TAG_volatile_type: - encoding_data_type = Type::eEncodingIsVolatileUID; - break; - } - - if (!clang_type && (encoding_data_type == Type::eEncodingIsPointerUID || - encoding_data_type == Type::eEncodingIsTypedefUID)) { - if (tag == DW_TAG_pointer_type) { - DWARFDIE target_die = die.GetReferencedDIE(DW_AT_type); - - if (target_die.GetAttributeValueAsUnsigned(DW_AT_APPLE_block, 0)) { - // Blocks have a __FuncPtr inside them which is a pointer to a - // function of the proper type. - - for (DWARFDIE child_die = target_die.GetFirstChild(); - child_die.IsValid(); child_die = child_die.GetSibling()) { - if (!strcmp(child_die.GetAttributeValueAsString(DW_AT_name, ""), - "__FuncPtr")) { - DWARFDIE function_pointer_type = - child_die.GetReferencedDIE(DW_AT_type); - - if (function_pointer_type) { - DWARFDIE function_type = - function_pointer_type.GetReferencedDIE(DW_AT_type); - - bool function_type_is_new_pointer; - TypeSP lldb_function_type_sp = ParseTypeFromDWARF( - sc, function_type, &function_type_is_new_pointer); - - if (lldb_function_type_sp) { - clang_type = m_ast.CreateBlockPointerType( - lldb_function_type_sp->GetForwardCompilerType()); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - } - } - - break; - } - } - } - } - - if (cu_language == eLanguageTypeObjC || - cu_language == eLanguageTypeObjC_plus_plus) { - if (attrs.name) { - static ConstString g_objc_type_name_id("id"); - static ConstString g_objc_type_name_Class("Class"); - static ConstString g_objc_type_name_selector("SEL"); - - if (attrs.name == g_objc_type_name_id) { - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " - "is Objective-C 'id' built-in type.", - die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCID); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - - } else if (attrs.name == g_objc_type_name_Class) { - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " - "is Objective-C 'Class' built-in type.", - die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCClass); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - } else if (attrs.name == g_objc_type_name_selector) { - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " - "is Objective-C 'selector' built-in type.", - die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCSel); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - } - } else if (encoding_data_type == Type::eEncodingIsPointerUID && - attrs.type.IsValid()) { - // Clang sometimes erroneously emits id as objc_object*. In that - // case we fix up the type to "id". - - const DWARFDIE encoding_die = attrs.type.Reference(); - - if (encoding_die && encoding_die.Tag() == DW_TAG_structure_type) { - if (const char *struct_name = encoding_die.GetName()) { - if (!strcmp(struct_name, "objc_object")) { - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s " - "'%s' is 'objc_object*', which we overrode to " - "'id'.", - die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCID); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - } - } - } - } - } - } - - type_sp = std::make_shared( - die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, - dwarf->GetUID(attrs.type.Reference()), encoding_data_type, &attrs.decl, - clang_type, resolve_state); - - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - } break; + type_sp = ParseTypeModifier(sc, die, attrs); + break; + } case DW_TAG_structure_type: case DW_TAG_union_type: @@ -726,6 +522,225 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, return UpdateSymbolContextScopeForType(sc, die, type_sp); } +lldb::TypeSP +DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + Log *log(LogChannelDWARF::GetLogIfAny(DWARF_LOG_TYPE_COMPLETION | + DWARF_LOG_LOOKUPS)); + SymbolFileDWARF *dwarf = die.GetDWARF(); + const dw_tag_t tag = die.Tag(); + LanguageType cu_language = die.GetLanguage(); + Type::ResolveState resolve_state = Type::ResolveState::Unresolved; + Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID; + TypeSP type_sp; + CompilerType clang_type; + + if (tag == DW_TAG_typedef && attrs.type.IsValid()) { + // Try to parse a typedef from the (DWARF embedded in the) Clang + // module file first as modules can contain typedef'ed + // structures that have no names like: + // + // typedef struct { int a; } Foo; + // + // In this case we will have a structure with no name and a + // typedef named "Foo" that points to this unnamed + // structure. The name in the typedef is the only identifier for + // the struct, so always try to get typedefs from Clang modules + // if possible. + // + // The type_sp returned will be empty if the typedef doesn't + // exist in a module file, so it is cheap to call this function + // just to check. + // + // If we don't do this we end up creating a TypeSP that says + // this is a typedef to type 0x123 (the DW_AT_type value would + // be 0x123 in the DW_TAG_typedef), and this is the unnamed + // structure type. We will have a hard time tracking down an + // unnammed structure type in the module debug info, so we make + // sure we don't get into this situation by always resolving + // typedefs from the module. + const DWARFDIE encoding_die = attrs.type.Reference(); + + // First make sure that the die that this is typedef'ed to _is_ + // just a declaration (DW_AT_declaration == 1), not a full + // definition since template types can't be represented in + // modules since only concrete instances of templates are ever + // emitted and modules won't contain those + if (encoding_die && + encoding_die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0) == 1) { + type_sp = ParseTypeFromClangModule(sc, die, log); + if (type_sp) + return type_sp; + } + } + + DEBUG_PRINTF("0x%8.8" PRIx64 ": %s (\"%s\") type => 0x%8.8lx\n", die.GetID(), + DW_TAG_value_to_name(tag), type_name_cstr, + encoding_uid.Reference()); + + switch (tag) { + default: + break; + + case DW_TAG_unspecified_type: + if (attrs.name == "nullptr_t" || attrs.name == "decltype(nullptr)") { + resolve_state = Type::ResolveState::Full; + clang_type = m_ast.GetBasicType(eBasicTypeNullPtr); + break; + } + // Fall through to base type below in case we can handle the type + // there... + LLVM_FALLTHROUGH; + + case DW_TAG_base_type: + resolve_state = Type::ResolveState::Full; + clang_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( + attrs.name.GetCString(), attrs.encoding, + attrs.byte_size.getValueOr(0) * 8); + break; + + case DW_TAG_pointer_type: + encoding_data_type = Type::eEncodingIsPointerUID; + break; + case DW_TAG_reference_type: + encoding_data_type = Type::eEncodingIsLValueReferenceUID; + break; + case DW_TAG_rvalue_reference_type: + encoding_data_type = Type::eEncodingIsRValueReferenceUID; + break; + case DW_TAG_typedef: + encoding_data_type = Type::eEncodingIsTypedefUID; + break; + case DW_TAG_const_type: + encoding_data_type = Type::eEncodingIsConstUID; + break; + case DW_TAG_restrict_type: + encoding_data_type = Type::eEncodingIsRestrictUID; + break; + case DW_TAG_volatile_type: + encoding_data_type = Type::eEncodingIsVolatileUID; + break; + } + + if (!clang_type && (encoding_data_type == Type::eEncodingIsPointerUID || + encoding_data_type == Type::eEncodingIsTypedefUID)) { + if (tag == DW_TAG_pointer_type) { + DWARFDIE target_die = die.GetReferencedDIE(DW_AT_type); + + if (target_die.GetAttributeValueAsUnsigned(DW_AT_APPLE_block, 0)) { + // Blocks have a __FuncPtr inside them which is a pointer to a + // function of the proper type. + + for (DWARFDIE child_die = target_die.GetFirstChild(); + child_die.IsValid(); child_die = child_die.GetSibling()) { + if (!strcmp(child_die.GetAttributeValueAsString(DW_AT_name, ""), + "__FuncPtr")) { + DWARFDIE function_pointer_type = + child_die.GetReferencedDIE(DW_AT_type); + + if (function_pointer_type) { + DWARFDIE function_type = + function_pointer_type.GetReferencedDIE(DW_AT_type); + + bool function_type_is_new_pointer; + TypeSP lldb_function_type_sp = ParseTypeFromDWARF( + sc, function_type, &function_type_is_new_pointer); + + if (lldb_function_type_sp) { + clang_type = m_ast.CreateBlockPointerType( + lldb_function_type_sp->GetForwardCompilerType()); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + } + } + + break; + } + } + } + } + + if (cu_language == eLanguageTypeObjC || + cu_language == eLanguageTypeObjC_plus_plus) { + if (attrs.name) { + static ConstString g_objc_type_name_id("id"); + static ConstString g_objc_type_name_Class("Class"); + static ConstString g_objc_type_name_selector("SEL"); + + if (attrs.name == g_objc_type_name_id) { + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " + "is Objective-C 'id' built-in type.", + die.GetOffset(), die.GetTagAsCString(), die.GetName()); + clang_type = m_ast.GetBasicType(eBasicTypeObjCID); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + + } else if (attrs.name == g_objc_type_name_Class) { + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " + "is Objective-C 'Class' built-in type.", + die.GetOffset(), die.GetTagAsCString(), die.GetName()); + clang_type = m_ast.GetBasicType(eBasicTypeObjCClass); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + } else if (attrs.name == g_objc_type_name_selector) { + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s '%s' " + "is Objective-C 'selector' built-in type.", + die.GetOffset(), die.GetTagAsCString(), die.GetName()); + clang_type = m_ast.GetBasicType(eBasicTypeObjCSel); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + } + } else if (encoding_data_type == Type::eEncodingIsPointerUID && + attrs.type.IsValid()) { + // Clang sometimes erroneously emits id as objc_object*. In that + // case we fix up the type to "id". + + const DWARFDIE encoding_die = attrs.type.Reference(); + + if (encoding_die && encoding_die.Tag() == DW_TAG_structure_type) { + if (const char *struct_name = encoding_die.GetName()) { + if (!strcmp(struct_name, "objc_object")) { + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s " + "'%s' is 'objc_object*', which we overrode to " + "'id'.", + die.GetOffset(), die.GetTagAsCString(), die.GetName()); + clang_type = m_ast.GetBasicType(eBasicTypeObjCID); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; + } + } + } + } + } + } + + type_sp = std::make_shared( + die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, + dwarf->GetUID(attrs.type.Reference()), encoding_data_type, &attrs.decl, + clang_type, resolve_state); + + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + return type_sp; +} + TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 0bca18ccd5d55..a8963bbbca1bf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -170,6 +170,9 @@ class DWARFASTParserClang : public DWARFASTParser { lldb::ModuleSP GetModuleForType(const DWARFDIE &die); private: + lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseEnum(const lldb_private::SymbolContext &sc, const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); lldb::TypeSP ParseSubroutine(const DWARFDIE &die, From 4023bd05fcb4f75c6a11d661f407f8e0896176e2 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 26 Nov 2019 11:00:23 +0100 Subject: [PATCH 033/591] [lldb] Add boilerplate to recognize the .debug_rnglists.dwo section --- lldb/include/lldb/lldb-enumerations.h | 1 + lldb/source/Core/Section.cpp | 2 ++ .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 1 + .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 1 + .../Plugins/SymbolFile/DWARF/DWARFContext.cpp | 3 ++- lldb/source/Symbol/ObjectFile.cpp | 1 + lldb/test/Shell/ObjectFile/ELF/section-types.yaml | 14 ++++++++++++++ 7 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 3c80bcffec20e..0a92365544f99 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -690,6 +690,7 @@ enum SectionType { eSectionTypeDWARFDebugStrDwo, eSectionTypeDWARFDebugStrOffsetsDwo, eSectionTypeDWARFDebugTypesDwo, + eSectionTypeDWARFDebugRngListsDwo, }; FLAGS_ENUM(EmulateInstructionOptions){ diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index 7615dc1d65c7f..e8fcca4603dfb 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -94,6 +94,8 @@ const char *Section::GetTypeAsCString() const { return "dwarf-ranges"; case eSectionTypeDWARFDebugRngLists: return "dwarf-rnglists"; + case eSectionTypeDWARFDebugRngListsDwo: + return "dwarf-rnglists-dwo"; case eSectionTypeDWARFDebugStr: return "dwarf-str"; case eSectionTypeDWARFDebugStrDwo: diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 3f8502548fc25..8eadaf1323d55 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -1581,6 +1581,7 @@ static SectionType GetSectionTypeFromName(llvm::StringRef Name) { .Case("pubtypes", eSectionTypeDWARFDebugPubTypes) .Case("ranges", eSectionTypeDWARFDebugRanges) .Case("rnglists", eSectionTypeDWARFDebugRngLists) + .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo) .Case("str", eSectionTypeDWARFDebugStr) .Case("str.dwo", eSectionTypeDWARFDebugStrDwo) .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index aff1d1e87bb67..57c43de0c945d 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1140,6 +1140,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { case eSectionTypeDWARFDebugPubTypes: case eSectionTypeDWARFDebugRanges: case eSectionTypeDWARFDebugRngLists: + case eSectionTypeDWARFDebugRngListsDwo: case eSectionTypeDWARFDebugStr: case eSectionTypeDWARFDebugStrDwo: case eSectionTypeDWARFDebugStrOffsets: diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp index eb307ce1cce1b..db8d7b3747ecd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp @@ -81,7 +81,8 @@ const DWARFDataExtractor &DWARFContext::getOrLoadRangesData() { } const DWARFDataExtractor &DWARFContext::getOrLoadRngListsData() { - return LoadOrGetSection(eSectionTypeDWARFDebugRngLists, llvm::None, + return LoadOrGetSection(eSectionTypeDWARFDebugRngLists, + eSectionTypeDWARFDebugRngListsDwo, m_data_debug_rnglists); } diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 38bc7722d0d02..4f6d74bbc7579 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -360,6 +360,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) { case eSectionTypeDWARFDebugPubTypes: case eSectionTypeDWARFDebugRanges: case eSectionTypeDWARFDebugRngLists: + case eSectionTypeDWARFDebugRngListsDwo: case eSectionTypeDWARFDebugStr: case eSectionTypeDWARFDebugStrDwo: case eSectionTypeDWARFDebugStrOffsets: diff --git a/lldb/test/Shell/ObjectFile/ELF/section-types.yaml b/lldb/test/Shell/ObjectFile/ELF/section-types.yaml index 9f6b4c0533b91..caac76a789ce0 100644 --- a/lldb/test/Shell/ObjectFile/ELF/section-types.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/section-types.yaml @@ -13,6 +13,12 @@ # CHECK-LABEL: Name: .debug_types.dwo # CHECK-NEXT: Type: dwarf-types-dwo +# CHECK-LABEL: Name: .debug_rnglists +# CHECK-NEXT: Type: dwarf-rnglists + +# CHECK-LABEL: Name: .debug_rnglists.dwo +# CHECK-NEXT: Type: dwarf-rnglists-dwo + # CHECK-LABEL: Name: .debug_names # CHECK-NEXT: Type: dwarf-names @@ -58,6 +64,14 @@ Sections: Type: SHT_PROGBITS AddressAlign: 0x0000000000000001 Content: DEADBEEFBAADF00D + - Name: .debug_rnglists + Type: SHT_PROGBITS + AddressAlign: 0x0000000000000001 + Content: DEADBEEFBAADF00D + - Name: .debug_rnglists.dwo + Type: SHT_PROGBITS + AddressAlign: 0x0000000000000001 + Content: DEADBEEFBAADF00D - Name: .debug_names Type: SHT_PROGBITS AddressAlign: 0x0000000000000001 From 0181338ddae26230d4067fdc00c2f7218f1d64d7 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 26 Nov 2019 14:01:10 +0100 Subject: [PATCH 034/591] [lldb][NFC] Simplify structure parsing code in DWARFASTParserClang::ParseTypeFromDWARF This way it looks more like the code around it. The assert is also gone as it just checks that the variables we declare directly above were not initialized by anyone. That made more sense when this was one large function. --- .../source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 8ead4ea4f519f..78c5af4821910 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -483,10 +483,8 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, case DW_TAG_structure_type: case DW_TAG_union_type: case DW_TAG_class_type: { - assert((!type_sp && !clang_type) && - "Did not expect partially computed structure-like type"); - TypeSP struct_like_type_sp = ParseStructureLikeDIE(sc, die, attrs); - return UpdateSymbolContextScopeForType(sc, die, struct_like_type_sp); + type_sp = ParseStructureLikeDIE(sc, die, attrs); + break; } case DW_TAG_enumeration_type: { From 91827ebf5e3d9eb633ba909e8f50a3d402698cce Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 26 Nov 2019 16:01:47 +0300 Subject: [PATCH 035/591] =?UTF-8?q?[yaml2obj]=20-=20Fix=20BB=20after=20?= =?UTF-8?q?=C2=AB[yaml2obj]=20-=20Teach=20tool=20to=20describe=20SHT=5FGNU?= =?UTF-8?q?=5Fverdef=20section=20with=20a=20"Content"=20property.=C2=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed a temporary file name. BB: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-ubuntu/builds/669 --- llvm/test/tools/yaml2obj/ELF/verdef-section.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml b/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml index 77798000ee68f..9493ab460b70b 100644 --- a/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml @@ -1,7 +1,7 @@ ## Check we are able to handle SHT_GNU_verdef sections. # RUN: yaml2obj --docnum=1 %s -o %t1 -# RUN: llvm-readobj -V %t | FileCheck %s +# RUN: llvm-readobj -V %t1 | FileCheck %s # CHECK: VersionDefinitions [ # CHECK-NEXT: Definition { From cdfecb82ee27fabf927102a356acd298ddca8703 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 26 Nov 2019 14:17:06 +0100 Subject: [PATCH 036/591] [lldb][NFC] Remove no longer unused variable in DWARFASTParserClang::ParseTypeFromDWARF --- lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 78c5af4821910..89331f7aca6ce 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -463,7 +463,6 @@ TypeSP DWARFASTParserClang::ParseTypeFromDWARF(const SymbolContext &sc, const dw_tag_t tag = die.Tag(); - CompilerType clang_type; TypeSP type_sp; switch (tag) { From 9b06897009dc32313354559c969d6cf0a564ec06 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Mon, 25 Nov 2019 14:33:25 +0100 Subject: [PATCH 037/591] [lldb/symbolvendorelf] Copy more sections from separate debug files Include the fancier DWARF5 sections too. --- .../SymbolVendor/ELF/SymbolVendorELF.cpp | 19 +++-- .../Shell/ObjectFile/ELF/build-id-case.yaml | 74 ++++++++++++++++++- 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp index e61e5763fabb9..d4d7a8937c127 100644 --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -119,14 +119,17 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, SectionList *objfile_section_list = dsym_objfile_sp->GetSectionList(); static const SectionType g_sections[] = { - eSectionTypeDWARFDebugAbbrev, eSectionTypeDWARFDebugAddr, - eSectionTypeDWARFDebugAranges, eSectionTypeDWARFDebugCuIndex, - eSectionTypeDWARFDebugFrame, eSectionTypeDWARFDebugInfo, - eSectionTypeDWARFDebugLine, eSectionTypeDWARFDebugLoc, - eSectionTypeDWARFDebugMacInfo, eSectionTypeDWARFDebugPubNames, - eSectionTypeDWARFDebugPubTypes, eSectionTypeDWARFDebugRanges, - eSectionTypeDWARFDebugStr, eSectionTypeDWARFDebugStrOffsets, - eSectionTypeELFSymbolTable, eSectionTypeDWARFGNUDebugAltLink, + eSectionTypeDWARFDebugAbbrev, eSectionTypeDWARFDebugAddr, + eSectionTypeDWARFDebugAranges, eSectionTypeDWARFDebugCuIndex, + eSectionTypeDWARFDebugFrame, eSectionTypeDWARFDebugInfo, + eSectionTypeDWARFDebugLine, eSectionTypeDWARFDebugLineStr, + eSectionTypeDWARFDebugLoc, eSectionTypeDWARFDebugLocLists, + eSectionTypeDWARFDebugMacInfo, eSectionTypeDWARFDebugMacro, + eSectionTypeDWARFDebugNames, eSectionTypeDWARFDebugPubNames, + eSectionTypeDWARFDebugPubTypes, eSectionTypeDWARFDebugRanges, + eSectionTypeDWARFDebugRngLists, eSectionTypeDWARFDebugStr, + eSectionTypeDWARFDebugStrOffsets, eSectionTypeDWARFDebugTypes, + eSectionTypeELFSymbolTable, eSectionTypeDWARFGNUDebugAltLink, }; for (SectionType section_type : g_sections) { if (SectionSP section_sp = diff --git a/lldb/test/Shell/ObjectFile/ELF/build-id-case.yaml b/lldb/test/Shell/ObjectFile/ELF/build-id-case.yaml index f9786b3754f84..08366056947bf 100644 --- a/lldb/test/Shell/ObjectFile/ELF/build-id-case.yaml +++ b/lldb/test/Shell/ObjectFile/ELF/build-id-case.yaml @@ -4,8 +4,25 @@ # RUN: llvm-objcopy --strip-all %t/.build-id/1b/8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug %t/stripped.out # RUN: lldb-test object-file %t/stripped.out | FileCheck %s +# CHECK: Name: .debug_abbrev +# CHECK: Name: .debug_addr +# CHECK: Name: .debug_aranges # CHECK: Name: .debug_frame -# CHECK-NEXT: Type: dwarf-frame +# CHECK: Name: .debug_info +# CHECK: Name: .debug_line +# CHECK: Name: .debug_line_str +# CHECK: Name: .debug_loc +# CHECK: Name: .debug_loclists +# CHECK: Name: .debug_macinfo +# CHECK: Name: .debug_macro +# CHECK: Name: .debug_names +# CHECK: Name: .debug_pubnames +# CHECK: Name: .debug_pubtypes +# CHECK: Name: .debug_ranges +# CHECK: Name: .debug_rnglists +# CHECK: Name: .debug_str +# CHECK: Name: .debug_str_offsets +# CHECK: Name: .debug_types --- !ELF FileHeader: @@ -27,9 +44,62 @@ Sections: Address: 0x00000000004003D0 AddressAlign: 0x0000000000000010 Content: DEADBEEFBAADF00D + - Name: .debug_abbrev + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_addr + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_aranges + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D - Name: .debug_frame Type: SHT_PROGBITS - AddressAlign: 0x0000000000000008 + Content: DEADBEEFBAADF00D + - Name: .debug_info + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_line + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_line_str + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_loc + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_loclists + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_macinfo + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_macro + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_names + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_pubnames + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_pubtypes + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_ranges + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_rnglists + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_str + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_str_offsets + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_types + Type: SHT_PROGBITS Content: DEADBEEFBAADF00D Symbols: - Name: main From 957d9a0335b8199b01caec56574e72154c3a1226 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 14 Nov 2019 15:31:26 +0100 Subject: [PATCH 038/591] [lldb] remove unsigned Stream::operator<< overloads Summary: I recently re-discovered that the unsinged stream operators of the lldb_private::Stream class have a surprising behavior in that they print the number in hex. This is all the more confusing because the "signed" versions of those operators behave normally. Now that, thanks to Raphael, each Stream class has a llvm::raw_ostream wrapper, I think we should delete most of our formatting capabilities and just delegate to that. This patch tests the water by just deleting the operators with the most surprising behavior. Most of the code using these operators was printing user_id_t values. It wasn't fully consistent about prefixing them with "0x", but I've tried to consistenly print it without that prefix, to make it more obviously different from pointer values. Reviewers: teemperor, JDevlieghere, jdoerfert Subscribers: lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D70241 --- lldb/include/lldb/Utility/Stream.h | 43 ++----------------- lldb/source/Expression/DWARFExpression.cpp | 2 +- lldb/source/Symbol/SymbolContext.cpp | 16 +++---- lldb/source/Symbol/Type.cpp | 2 +- lldb/source/Symbol/Variable.cpp | 2 +- lldb/source/Utility/Stream.cpp | 24 ----------- .../test/Shell/SymbolFile/DWARF/array-sizes.s | 2 +- lldb/unittests/Utility/StreamTest.cpp | 9 ---- 8 files changed, 15 insertions(+), 85 deletions(-) diff --git a/lldb/include/lldb/Utility/Stream.h b/lldb/include/lldb/Utility/Stream.h index 414f921773030..88cdb88d77adf 100644 --- a/lldb/include/lldb/Utility/Stream.h +++ b/lldb/include/lldb/Utility/Stream.h @@ -213,45 +213,10 @@ class Stream { /// in one statement. Stream &operator<<(char ch); - /// Output a uint8_t \a uval to the stream \a s. - /// - /// \param[in] uval - /// A uint8_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(uint8_t uval); - - /// Output a uint16_t \a uval to the stream \a s. - /// - /// \param[in] uval - /// A uint16_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(uint16_t uval); - - /// Output a uint32_t \a uval to the stream \a s. - /// - /// \param[in] uval - /// A uint32_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(uint32_t uval); - - /// Output a uint64_t \a uval to the stream \a s. - /// - /// \param[in] uval - /// A uint64_t value. - /// - /// \return - /// A reference to this class so multiple things can be streamed - /// in one statement. - Stream &operator<<(uint64_t uval); + Stream &operator<<(uint8_t uval) = delete; + Stream &operator<<(uint16_t uval) = delete; + Stream &operator<<(uint32_t uval) = delete; + Stream &operator<<(uint64_t uval) = delete; /// Output a int8_t \a sval to the stream \a s. /// diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index a063da0f4e401..8947500959cbd 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -146,7 +146,7 @@ void DWARFExpression::GetDescription(Stream *s, lldb::DescriptionLevel level, // We have a new base address if (count > 0) s->PutCString(", "); - *s << "base_addr = " << end_addr_offset; + s->Format("base_addr = {0:x}", end_addr_offset); } } diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index 7828ca613359d..c5d8547b08c8b 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -315,14 +315,14 @@ void SymbolContext::Dump(Stream *s, Target *target) const { s->Indent(); *s << "CompileUnit = " << comp_unit; if (comp_unit != nullptr) - *s << " {0x" << comp_unit->GetID() << "} " - << *(static_cast(comp_unit)); + s->Format(" {{{0:x-16}} {1}", comp_unit->GetID(), + *static_cast(comp_unit)); s->EOL(); s->Indent(); *s << "Function = " << function; if (function != nullptr) { - *s << " {0x" << function->GetID() << "} " << function->GetType()->GetName() - << ", address-range = "; + s->Format(" {{{0:x-16}} {1}, address-range = ", function->GetID(), + function->GetType()->GetName()); function->GetAddressRange().Dump(s, target, Address::DumpStyleLoadAddress, Address::DumpStyleModuleWithFileAddress); s->EOL(); @@ -337,10 +337,7 @@ void SymbolContext::Dump(Stream *s, Target *target) const { s->Indent(); *s << "Block = " << block; if (block != nullptr) - *s << " {0x" << block->GetID() << '}'; - // Dump the block and pass it a negative depth to we print all the parent - // blocks if (block != NULL) - // block->Dump(s, function->GetFileAddress(), INT_MIN); + s->Format(" {{{0:x-16}}", block->GetID()); s->EOL(); s->Indent(); *s << "LineEntry = "; @@ -354,7 +351,8 @@ void SymbolContext::Dump(Stream *s, Target *target) const { s->EOL(); *s << "Variable = " << variable; if (variable != nullptr) { - *s << " {0x" << variable->GetID() << "} " << variable->GetType()->GetName(); + s->Format(" {{{0:x-16}} {1}", variable->GetID(), + variable->GetType()->GetName()); s->EOL(); } s->IndentLess(); diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp index c3e5c03709517..6465ce3dd156f 100644 --- a/lldb/source/Symbol/Type.cpp +++ b/lldb/source/Symbol/Type.cpp @@ -255,7 +255,7 @@ void Type::Dump(Stream *s, bool show_context) { *s << ", compiler_type = " << m_compiler_type.GetOpaqueQualType() << ' '; GetForwardCompilerType().DumpTypeDescription(s); } else if (m_encoding_uid != LLDB_INVALID_UID) { - *s << ", type_data = " << (uint64_t)m_encoding_uid; + s->Format(", type_data = {0:x-16}", m_encoding_uid); switch (m_encoding_uid_type) { case eEncodingInvalid: break; diff --git a/lldb/source/Symbol/Variable.cpp b/lldb/source/Symbol/Variable.cpp index 427dbf459c4eb..a2eeaa1d2a5b2 100644 --- a/lldb/source/Symbol/Variable.cpp +++ b/lldb/source/Symbol/Variable.cpp @@ -112,7 +112,7 @@ void Variable::Dump(Stream *s, bool show_context) const { if (m_symfile_type_sp) { Type *type = m_symfile_type_sp->GetType(); if (type) { - *s << ", type = {" << type->GetID() << "} " << (void *)type << " ("; + s->Format(", type = {{{0:x-16}} {1} (", type->GetID(), type); type->DumpTypeName(s); s->PutChar(')'); } diff --git a/lldb/source/Utility/Stream.cpp b/lldb/source/Utility/Stream.cpp index c48a12acd9064..991f7e924d8dd 100644 --- a/lldb/source/Utility/Stream.cpp +++ b/lldb/source/Utility/Stream.cpp @@ -160,30 +160,6 @@ Stream &Stream::operator<<(const void *p) { return *this; } -// Stream a uint8_t "uval" out to this stream. -Stream &Stream::operator<<(uint8_t uval) { - PutHex8(uval); - return *this; -} - -// Stream a uint16_t "uval" out to this stream. -Stream &Stream::operator<<(uint16_t uval) { - PutHex16(uval, m_byte_order); - return *this; -} - -// Stream a uint32_t "uval" out to this stream. -Stream &Stream::operator<<(uint32_t uval) { - PutHex32(uval, m_byte_order); - return *this; -} - -// Stream a uint64_t "uval" out to this stream. -Stream &Stream::operator<<(uint64_t uval) { - PutHex64(uval, m_byte_order); - return *this; -} - // Stream a int8_t "sval" out to this stream. Stream &Stream::operator<<(int8_t sval) { Printf("%i", static_cast(sval)); diff --git a/lldb/test/Shell/SymbolFile/DWARF/array-sizes.s b/lldb/test/Shell/SymbolFile/DWARF/array-sizes.s index f00fe2ad005d2..b810527b5535e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/array-sizes.s +++ b/lldb/test/Shell/SymbolFile/DWARF/array-sizes.s @@ -10,7 +10,7 @@ # RUN: lldb-test symbols %t | FileCheck %s # CHECK: Variable{0x7fffffff0000001e}, name = "X" -# CHECK-SAME: type = {7fffffff00000033} 0x{{[0-9a-f]*}} (char [56]) +# CHECK-SAME: type = {7fffffff00000033} 0x{{[0-9A-F]*}} (char [56]) # Generated from "char X[47];" diff --git a/lldb/unittests/Utility/StreamTest.cpp b/lldb/unittests/Utility/StreamTest.cpp index 2e2bcb344fcdf..40a1601b976e1 100644 --- a/lldb/unittests/Utility/StreamTest.cpp +++ b/lldb/unittests/Utility/StreamTest.cpp @@ -304,15 +304,6 @@ TEST_F(StreamTest, ShiftOperatorInts) { EXPECT_EQ("127 32767 2147483647 9223372036854775807", TakeValue()); } -TEST_F(StreamTest, ShiftOperatorUInts) { - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max() << " "; - s << std::numeric_limits::max(); - EXPECT_EQ(33U, s.GetWrittenBytes()); - EXPECT_EQ("ff ffff ffffffff ffffffffffffffff", TakeValue()); -} - TEST_F(StreamTest, ShiftOperatorPtr) { // This test is a bit tricky because pretty much everything related to // pointer printing seems to lead to UB or IB. So let's make the most basic From 4eacc32672e60113b835c4356d1c398dc1e30279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Tue, 26 Nov 2019 12:13:13 +0100 Subject: [PATCH 039/591] Partially reland "[Diagnostics] Put "deprecated copy" warnings into -Wdeprecated-copy"" But do not enable it under -Wextra until libcxx issue is solved. --- clang/include/clang/Basic/DiagnosticGroups.td | 4 ++++ .../clang/Basic/DiagnosticSemaKinds.td | 10 ++++++--- clang/lib/Sema/SemaDeclCXX.cpp | 10 ++++----- clang/test/SemaCXX/deprecated-copy.cpp | 22 +++++++++++++++++++ 4 files changed, 38 insertions(+), 8 deletions(-) create mode 100644 clang/test/SemaCXX/deprecated-copy.cpp diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 6b83bf59ea892..9f5900f5bec85 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -128,6 +128,8 @@ def CXX11CompatDeprecatedWritableStr : def DeprecatedAttributes : DiagGroup<"deprecated-attributes">; def DeprecatedCommaSubscript : DiagGroup<"deprecated-comma-subscript">; +def DeprecatedCopy : DiagGroup<"deprecated-copy">; +def DeprecatedCopyDtor : DiagGroup<"deprecated-copy-dtor">; def DeprecatedDeclarations : DiagGroup<"deprecated-declarations">; def UnavailableDeclarations : DiagGroup<"unavailable-declarations">; def UnguardedAvailabilityNew : DiagGroup<"unguarded-availability-new">; @@ -147,6 +149,8 @@ def DeprecatedWritableStr : DiagGroup<"deprecated-writable-strings", // FIXME: Why is DeprecatedImplementations not in this group? def Deprecated : DiagGroup<"deprecated", [DeprecatedAttributes, DeprecatedCommaSubscript, + DeprecatedCopy, + DeprecatedCopyDtor, DeprecatedDeclarations, DeprecatedDynamicExceptionSpec, DeprecatedIncrementBool, diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 799b3ed2ea920..c19862addec91 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -551,9 +551,13 @@ def err_access_decl : Error< "use using declarations instead">; def warn_deprecated_copy_operation : Warning< "definition of implicit copy %select{constructor|assignment operator}1 " - "for %0 is deprecated because it has a user-declared " - "%select{copy %select{assignment operator|constructor}1|destructor}2">, - InGroup, DefaultIgnore; + "for %0 is deprecated because it has a user-declared copy " + "%select{assignment operator|constructor}1">, + InGroup, DefaultIgnore; +def warn_deprecated_copy_dtor_operation : Warning< + "definition of implicit copy %select{constructor|assignment operator}1 " + "for %0 is deprecated because it has a user-declared destructor">, + InGroup, DefaultIgnore; def warn_cxx17_compat_exception_spec_in_signature : Warning< "mangled name of %0 will change in C++17 due to non-throwing exception " "specification in function signature">, InGroup; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index e3ea9788ad5db..f469580dd8661 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -12406,8 +12406,7 @@ static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp) { // In Microsoft mode, assignment operations don't affect constructors and // vice versa. - if (RD->hasUserDeclaredDestructor() && - RD->getDestructor()->isUserProvided()) { + if (RD->hasUserDeclaredDestructor()) { UserDeclaredOperation = RD->getDestructor(); } else if (!isa(CopyOp) && RD->hasUserDeclaredCopyConstructor() && @@ -12435,9 +12434,10 @@ static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp) { if (UserDeclaredOperation && UserDeclaredOperation->isUserProvided()) { S.Diag(UserDeclaredOperation->getLocation(), - diag::warn_deprecated_copy_operation) - << RD << /*copy assignment*/!isa(CopyOp) - << /*destructor*/isa(UserDeclaredOperation); + isa(UserDeclaredOperation) + ? diag::warn_deprecated_copy_dtor_operation + : diag::warn_deprecated_copy_operation) + << RD << /*copy assignment*/ !isa(CopyOp); } } diff --git a/clang/test/SemaCXX/deprecated-copy.cpp b/clang/test/SemaCXX/deprecated-copy.cpp new file mode 100644 index 0000000000000..c2ab3c40bbae6 --- /dev/null +++ b/clang/test/SemaCXX/deprecated-copy.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -std=c++11 %s -Wdeprecated-copy -verify +// RUN: %clang_cc1 -std=c++11 %s -Wdeprecated-copy-dtor -DDEPRECATED_COPY_DTOR -verify + +#ifdef DEPRECATED_COPY_DTOR +struct A { + int *ptr; + ~A() { delete ptr; } // expected-warning {{definition of implicit copy constructor for 'A' is deprecated because it has a user-declared destructor}} +}; + +void foo() { + A a{}; + A b = a; // expected-note {{implicit copy constructor for 'A' first required here}} +} +#else +struct B { + B &operator=(const B &); // expected-warning {{definition of implicit copy constructor for 'B' is deprecated because it has a user-declared copy assignment operator}} +}; + +void bar() { + B b1, b2(b1); // expected-note {{implicit copy constructor for 'B' first required here}} +} +#endif From 6612fabc47f16d265d169b5b73dc8070dde436a8 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 26 Nov 2019 14:47:28 +0100 Subject: [PATCH 040/591] [lldb] remove a superfluous semicolon --- .../Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp index b4d8ba2218a17..d556aae1c458c 100644 --- a/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.cpp @@ -62,7 +62,7 @@ bool CPPLanguageRuntime::GetObjectDescription( bool contains_lambda_identifier(llvm::StringRef &str_ref) { return str_ref.contains("$_") || str_ref.contains("'lambda'"); -}; +} CPPLanguageRuntime::LibCppStdFunctionCallableInfo line_entry_helper(Target &target, const SymbolContext &sc, Symbol *symbol, From 12284e54b483c2541eb09cd7c575f438970b66c1 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 26 Nov 2019 14:48:47 +0100 Subject: [PATCH 041/591] [lldb] fix a -Wcast-qual warning --- lldb/tools/driver/Driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index fe4a17762f8bc..e13507be22da2 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -854,7 +854,7 @@ int main(int argc, char const *argv[]) { } // Register the reproducer signal handler. - llvm::sys::AddSignalHandler(reproducer_handler, (void *)(argv[0])); + llvm::sys::AddSignalHandler(reproducer_handler, const_cast(argv[0])); SBError error = SBDebugger::InitializeWithErrorHandling(); if (error.Fail()) { From 64225aea8f9ea92bd4189acceead272f498981b8 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 26 Nov 2019 15:29:30 +0300 Subject: [PATCH 042/591] [llvm-readobj][test] - Cleanup the many-sections.s test case. It removes 2 precompiled binaries used which are now can be crafted with the use of yaml2obj. Differential revision: https://reviews.llvm.org/D70711 --- .../Inputs/many-sections-stripped.elf-x86_64 | Bin 504 -> 0 bytes .../Inputs/many-sections.elf-x86_64 | Bin 504 -> 0 bytes llvm/test/tools/llvm-readobj/many-sections.s | 74 +++++++++++------- 3 files changed, 47 insertions(+), 27 deletions(-) delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/many-sections-stripped.elf-x86_64 delete mode 100644 llvm/test/tools/llvm-readobj/Inputs/many-sections.elf-x86_64 diff --git a/llvm/test/tools/llvm-readobj/Inputs/many-sections-stripped.elf-x86_64 b/llvm/test/tools/llvm-readobj/Inputs/many-sections-stripped.elf-x86_64 deleted file mode 100644 index a589dc5d6a9c5ed4ea6c4d2d1e04179ebc4f43c0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 504 zcmb<-^>JfjWMqH=Mg}_u1P>;PN;`nLAoBlzFcSyCjKw67*#cO^8T3k0D@qvjiYs$V z5|e;*MsZ0Igax4);)_cXi%M`0b#|1hWsO2VFk^I$RW4 diff --git a/llvm/test/tools/llvm-readobj/Inputs/many-sections.elf-x86_64 b/llvm/test/tools/llvm-readobj/Inputs/many-sections.elf-x86_64 deleted file mode 100644 index 1abb98a01b845630a8b8f5b20410da22bda9f330..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 504 zcmb<-^>JfjWMqH=Mg}_u1P><4z_0_sWN-kpLFE7cU?vWN8H-6Evjwn-Gw7A1R+KR4 z6<6k#BqjmrjN+0a2n#|p#21$&7M0+z3PYF`D#DB*0}%kZ1BBVorXFfI#>0j1f2 zG!vQ_i0c3q^MKMIeIRulP`(6|<_FT)+|~h=SA^0q3glK~APMAP)4vj`2xcEl54wH; DhwBv) diff --git a/llvm/test/tools/llvm-readobj/many-sections.s b/llvm/test/tools/llvm-readobj/many-sections.s index b2050c74eec24..ae7ce34706cad 100644 --- a/llvm/test/tools/llvm-readobj/many-sections.s +++ b/llvm/test/tools/llvm-readobj/many-sections.s @@ -1,37 +1,57 @@ -## many-sections.elf-x86_64 is a file that was generated to simulate -## an object with more than ~65k sections. When an ELF object -## has SHN_LORESERVE (0xff00) or more sections, its e_shnum field +## Here we simulate an object with more than ~65k sections and check how we dump it. +## When an ELF object has SHN_LORESERVE (0xff00) or more sections, its e_shnum field ## should be zero and sh_size of the section header at index 0 is used ## to store the value. If the section name string table section index is ## greater than or equal to SHN_LORESERVE, then e_shstrndx field ## should have the value of SHN_XINDEX and sh_link of the section header ## at index 0 is used to store the value. -## -## many-sections.elf-x86_64 has few sections to save disk -## space, but its e_shnum, e_shstrndx, sh_size and sh_link fields are set -## according to the above description, so that we can test the dumper. - -# RUN: llvm-readelf --file-headers -S %p/Inputs/many-sections.elf-x86_64 | \ -# RUN: FileCheck %s --check-prefix=GNU1 -# GNU1: Number of section headers: 0 (5) -# GNU1: Section header string table index: 65535 (3) -# GNU1: There are 5 section headers, starting at offset 0xb8 - -# RUN: llvm-readobj --file-headers %p/Inputs/many-sections.elf-x86_64 | \ -# RUN: FileCheck %s --check-prefix=LLVM1 -# LLVM1: SectionHeaderCount: 0 (5) -# LLVM1: StringTableSectionIndex: 65535 (3) - -## many-sections-stripped.elf-x86_64 is many-sections.elf-x86_64 with -## e_shoff field set to zero, but not e_shstrndx, to show that -## this corrupt case is handled correctly. - -# RUN: llvm-readelf --file-headers %p/Inputs/many-sections-stripped.elf-x86_64 | \ -# RUN: FileCheck %s --check-prefix=GNU2 + +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: llvm-readelf --file-headers -S %t1 | FileCheck %s --check-prefix=GNU1 +# GNU1: Number of section headers: 0 (3) +# GNU1: Section header string table index: 65535 (2) +# GNU1: There are 3 section headers, starting at offset 0x58 + +# RUN: llvm-readobj --file-headers %t1 | FileCheck %s --check-prefix=LLVM1 +# LLVM1: SectionHeaderCount: 0 (3) +# LLVM1: StringTableSectionIndex: 65535 (2) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SHNum: 0 + SHStrNdx: 0xffff ## SHN_XINDEX +Sections: + - Type: SHT_NULL + Link: .shstrtab + Size: 0x3 + +## Another case is when e_shoff field set to zero, but not e_shstrndx. +## We want to show that this corrupt case is handled correctly. + +# RUN: yaml2obj --docnum=2 %s -o %t2 + +# RUN: llvm-readelf --file-headers %t2 | FileCheck %s --check-prefix=GNU2 # GNU2: Number of section headers: 0 # GNU2: Section header string table index: 65535 (corrupt: out of range) -# RUN: llvm-readobj --file-headers %p/Inputs/many-sections-stripped.elf-x86_64 | \ -# RUN: FileCheck %s --check-prefix=LLVM2 +# RUN: llvm-readobj --file-headers %t2 | FileCheck %s --check-prefix=LLVM2 # LLVM2: SectionHeaderCount: 0 # LLVM2: StringTableSectionIndex: 65535 (corrupt: out of range) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SHOff: 0 + SHNum: 0 + SHStrNdx: 0xffff ## SHN_XINDEX +Sections: + - Type: SHT_NULL + Link: .shstrtab + Size: 0x3 From 290e43ddb660db2e9b294e1e963222c08965d211 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 26 Nov 2019 15:00:15 +0100 Subject: [PATCH 043/591] [lldb] Use llvm::format in AppleObjCRuntimeV2.cpp Crushing a "sprintf" buffer is null warning. --- .../ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index 9bdbef393e39f..750b6ce6b0c6a 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -1625,19 +1625,13 @@ AppleObjCRuntimeV2::UpdateISAToDescriptorMapSharedCache() { // Substitute in the correct class_getName / class_getNameRaw function name, // concatenate the two parts of our expression text. The format string // has two %s's, so provide the name twice. - int prefix_string_size = snprintf (nullptr, 0, + std::string shared_class_expression; + llvm::raw_string_ostream(shared_class_expression) << llvm::format( g_shared_cache_class_name_funcptr, class_name_getter_function_name.AsCString(), class_name_getter_function_name.AsCString()); - char *class_name_func_ptr_expr = (char*) malloc (prefix_string_size + 1); - snprintf (class_name_func_ptr_expr, prefix_string_size + 1, - g_shared_cache_class_name_funcptr, - class_name_getter_function_name.AsCString(), - class_name_getter_function_name.AsCString()); - std::string shared_class_expression = class_name_func_ptr_expr; shared_class_expression += g_get_shared_cache_class_info_body; - free (class_name_func_ptr_expr); m_get_shared_cache_class_info_code.reset( GetTargetRef().GetUtilityFunctionForLanguage( From 16144d2b21d90a0515be2fc9158cbaf828abd980 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 26 Nov 2019 14:23:32 +0100 Subject: [PATCH 044/591] [lldb][NFC] Modernize string handling in DWARFASTParserClang::ParseTypeModifier --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 38 ++++++++----------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 89331f7aca6ce..fe6ab3064447e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -662,11 +662,7 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, if (cu_language == eLanguageTypeObjC || cu_language == eLanguageTypeObjC_plus_plus) { if (attrs.name) { - static ConstString g_objc_type_name_id("id"); - static ConstString g_objc_type_name_Class("Class"); - static ConstString g_objc_type_name_selector("SEL"); - - if (attrs.name == g_objc_type_name_id) { + if (attrs.name == "id") { if (log) dwarf->GetObjectFile()->GetModule()->LogMessage( log, @@ -677,8 +673,7 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, encoding_data_type = Type::eEncodingIsUID; attrs.type.Clear(); resolve_state = Type::ResolveState::Full; - - } else if (attrs.name == g_objc_type_name_Class) { + } else if (attrs.name == "Class") { if (log) dwarf->GetObjectFile()->GetModule()->LogMessage( log, @@ -689,7 +684,7 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, encoding_data_type = Type::eEncodingIsUID; attrs.type.Clear(); resolve_state = Type::ResolveState::Full; - } else if (attrs.name == g_objc_type_name_selector) { + } else if (attrs.name == "SEL") { if (log) dwarf->GetObjectFile()->GetModule()->LogMessage( log, @@ -709,20 +704,19 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, const DWARFDIE encoding_die = attrs.type.Reference(); if (encoding_die && encoding_die.Tag() == DW_TAG_structure_type) { - if (const char *struct_name = encoding_die.GetName()) { - if (!strcmp(struct_name, "objc_object")) { - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessage( - log, - "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s " - "'%s' is 'objc_object*', which we overrode to " - "'id'.", - die.GetOffset(), die.GetTagAsCString(), die.GetName()); - clang_type = m_ast.GetBasicType(eBasicTypeObjCID); - encoding_data_type = Type::eEncodingIsUID; - attrs.type.Clear(); - resolve_state = Type::ResolveState::Full; - } + llvm::StringRef struct_name = encoding_die.GetName(); + if (struct_name == "objc_object") { + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessage( + log, + "SymbolFileDWARF::ParseType (die = 0x%8.8x) %s " + "'%s' is 'objc_object*', which we overrode to " + "'id'.", + die.GetOffset(), die.GetTagAsCString(), die.GetName()); + clang_type = m_ast.GetBasicType(eBasicTypeObjCID); + encoding_data_type = Type::eEncodingIsUID; + attrs.type.Clear(); + resolve_state = Type::ResolveState::Full; } } } From 2bd252ea894189f77e09755cf6951727e1d03a74 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 Nov 2019 09:07:17 -0500 Subject: [PATCH 045/591] [InferFuncAttributes][Attributor] add tests for 'dereferenceable'; NFC Pulling a couple of extra tests out of D64258 before abandoning in favor of D70714 --- .../InferFunctionAttrs/dereferenceable.ll | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll b/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll index 322d85aba9c00..bf7daba40d430 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll @@ -161,6 +161,19 @@ define void @volatile_is_not_dereferenceable(i16* %ptr) { ret void } +; TODO: We should allow inference for atomic (but not volatile) ops. + +define void @atomic_is_alright(i16* %ptr) { +; CHECK-LABEL: @atomic_is_alright(i16* %ptr) + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load atomic i16, i16* %arrayidx0 unordered, align 2 + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + declare void @may_not_return() define void @not_guaranteed_to_transfer_execution(i16* %ptr) { @@ -244,6 +257,21 @@ define void @more_bytes(i32* dereferenceable(8) %ptr) { ret void } +; Improve on existing dereferenceable_or_null attribute. + +define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { +; CHECK-LABEL: @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx2 = getelementptr i32, i32* %ptr, i64 2 + %t3 = load i32, i32* %arrayidx3 + %t1 = load i32, i32* %arrayidx1 + %t2 = load i32, i32* %arrayidx2 + %t0 = load i32, i32* %arrayidx0 + ret void +} + ; But don't pessimize existing dereferenceable attribute. define void @better_bytes(i32* dereferenceable(100) %ptr) { From 5871cba86172c5bd947952a9441acf80332455ea Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 26 Nov 2019 15:11:16 +0100 Subject: [PATCH 046/591] [lldb] Avoid snprintf in PlatformRemoteDarwinDevice This quashes a -Wformat-truncation warning. --- .../MacOSX/PlatformRemoteDarwinDevice.cpp | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp index e9bb29293189d..0aa129c808d43 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp @@ -449,12 +449,10 @@ Status PlatformRemoteDarwinDevice::GetSymbolFile(const FileSpec &platform_file, Status error; char platform_file_path[PATH_MAX]; if (platform_file.GetPath(platform_file_path, sizeof(platform_file_path))) { - char resolved_path[PATH_MAX]; - const char *os_version_dir = GetDeviceSupportDirectoryForOSVersion(); if (os_version_dir) { - ::snprintf(resolved_path, sizeof(resolved_path), "%s/%s", os_version_dir, - platform_file_path); + std::string resolved_path = + (llvm::Twine(os_version_dir) + "/" + platform_file_path).str(); local_file.SetFile(resolved_path, FileSpec::Style::native); FileSystem::Instance().Resolve(local_file); @@ -466,31 +464,28 @@ Status PlatformRemoteDarwinDevice::GetSymbolFile(const FileSpec &platform_file, return error; } - ::snprintf(resolved_path, sizeof(resolved_path), "%s/Symbols.Internal/%s", - os_version_dir, platform_file_path); + resolved_path = (llvm::Twine(os_version_dir) + "/Symbols.Internal/" + + platform_file_path) + .str(); local_file.SetFile(resolved_path, FileSpec::Style::native); FileSystem::Instance().Resolve(local_file); if (FileSystem::Instance().Exists(local_file)) { - if (log) { - LLDB_LOGF( - log, - "Found a copy of %s in the DeviceSupport dir %s/Symbols.Internal", - platform_file_path, os_version_dir); - } + LLDB_LOGF( + log, + "Found a copy of %s in the DeviceSupport dir %s/Symbols.Internal", + platform_file_path, os_version_dir); return error; } - ::snprintf(resolved_path, sizeof(resolved_path), "%s/Symbols/%s", - os_version_dir, platform_file_path); + resolved_path = + (llvm::Twine(os_version_dir) + "/Symbols/" + platform_file_path) + .str(); local_file.SetFile(resolved_path, FileSpec::Style::native); FileSystem::Instance().Resolve(local_file); if (FileSystem::Instance().Exists(local_file)) { - if (log) { - LLDB_LOGF(log, - "Found a copy of %s in the DeviceSupport dir %s/Symbols", - platform_file_path, os_version_dir); - } + LLDB_LOGF(log, "Found a copy of %s in the DeviceSupport dir %s/Symbols", + platform_file_path, os_version_dir); return error; } } From d88f67bdca7526c2adc3749bb2cfef6b32da971c Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 20 Nov 2019 17:37:56 +0300 Subject: [PATCH 047/591] [llvm-readobj/llvm-readelf] - Reimplement dumping of the SHT_GNU_verdef section. Currently we have following issues: 1) We have 2 different implementations with a different behaviors for GNU/LLVM styles. 2) Errors are either not handled at all or we call report_fatal_error with not helpfull messages. 3) There is no test coverage even for those errors that are reported. This patch reimplements parsing of the SHT_GNU_verdef section entries in a single place, adds a few error messages and test coverage. Differential revision: https://reviews.llvm.org/D70495 --- .../tools/llvm-readobj/elf-versioninfo.test | 244 +++++++++++++++++- .../tools/yaml2obj/ELF/verdef-section.yaml | 2 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 217 +++++++++++----- 3 files changed, 390 insertions(+), 73 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test index 27bf302edf9c6..c732f7d2a2b6c 100644 --- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test +++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test @@ -63,6 +63,7 @@ Sections: Names: - VERSION2 - VERSION1 + - VERSION3 - Name: .gnu.version_r Type: SHT_GNU_verneed Flags: [ SHF_ALLOC ] @@ -193,7 +194,7 @@ DynamicSymbols: # LLVM-NEXT: Index: 3 # LLVM-NEXT: Hash: 175630258 # LLVM-NEXT: Name: VERSION2 -# LLVM-NEXT: Predecessor: VERSION1 +# LLVM-NEXT: Predecessors: [VERSION1, VERSION3] # LLVM-NEXT: } # LLVM-NEXT: ] # LLVM-NEXT: VersionRequirements [ @@ -273,11 +274,12 @@ DynamicSymbols: # GNU-NEXT: 0x0038: Rev: 1 Flags: WEAK Index: 0 Cnt: 1 Name: VERSION1 # GNU-NEXT: 0x0054: Rev: 1 Flags: INFO Index: 0 Cnt: 1 Name: VERSION1 # GNU-NEXT: 0x0070: Rev: 1 Flags: BASE | WEAK | INFO Index: 2 Cnt: 1 Name: VERSION1 -# GNU-NEXT: 0x008c: Rev: 1 Flags: Index: 3 Cnt: 2 Name: VERSION2 -# GNU-NEXT: 0x00a8: Parent 1: VERSION1 +# GNU-NEXT: 0x008c: Rev: 1 Flags: Index: 3 Cnt: 3 Name: VERSION2 +# GNU-NEXT: 0x00b0: Parent 1: VERSION1 +# GNU-NEXT: 0x00b0: Parent 2: VERSION3 # GNU-EMPTY: # GNU-NEXT: Version needs section '.gnu.version_r' contains 2 entries: -# GNU-NEXT: Addr: 0000000000000000 Offset: 0x0000fc Link: 7 (.dynstr) +# GNU-NEXT: Addr: 0000000000000000 Offset: 0x000104 Link: 7 (.dynstr) # GNU-NEXT: 0x0000: Version: 1 File: verneed1.so.0 Cnt: 5 # GNU-NEXT: 0x0010: Name: v1 Flags: BASE Version: 0 # GNU-NEXT: 0x0020: Name: v1 Flags: WEAK Version: 0 @@ -286,3 +288,237 @@ DynamicSymbols: # GNU-NEXT: 0x0050: Name: v2 Flags: Version: 5 # GNU-NEXT: 0x0060: Version: 1 File: verneed2.so.0 Cnt: 1 # GNU-NEXT: 0x0070: Name: v3 Flags: none Version: 6 + +## Check that we report a warning when sh_link references a non-existent section. + +# RUN: yaml2obj %s --docnum=2 -o %t2 +# RUN: llvm-readobj -V %t2 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-LLVM -DFILE=%t2 +# RUN: not llvm-readelf -V %t2 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-GNU -DFILE=%t2 + +# INVALID-LINK-LLVM: warning: '[[FILE]]': invalid section linked to SHT_GNU_verdef section with index 1: invalid section index: 255 + +## TODO: llvm-readelf should also report a meaningful warning instead of an error. +# INVALID-LINK-GNU: Version definition +# INVALID-LINK-GNU: error: '[[FILE]]': invalid section index: 255 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: 0xFF + Info: 0x0 + Entries: [] + +## Check that we report a warning when the sh_link field of a SHT_GNU_verdef section references a non-string table section. + +# RUN: yaml2obj %s --docnum=3 -o %t3 +# RUN: llvm-readobj -V %t3 2>&1 | FileCheck %s --check-prefix=INVALID-STRING-TABLE -DFILE=%t3 +# RUN: llvm-readelf -V %t3 2>&1 | FileCheck %s --check-prefix=INVALID-STRING-TABLE -DFILE=%t3 + +# INVALID-STRING-TABLE: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verdef section with index 1: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: 0x0 + Info: 0x0 + Entries: [] + +## Check that we report a warning when we can't read the content of the SHT_GNU_verdef section. + +# RUN: yaml2obj %s --docnum=4 -o %t4 +# RUN: llvm-readobj -V %t4 2>&1 | FileCheck %s --check-prefix=INVALID-DATA -DFILE=%t4 +# RUN: llvm-readelf -V %t4 2>&1 | FileCheck %s --check-prefix=INVALID-DATA -DFILE=%t4 + +# INVALID-DATA: warning: '[[FILE]]': cannot read content of SHT_GNU_verdef section with index 1: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x0) that cannot be represented + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x0 + Entries: [] + ShOffset: 0xFFFFFFFF +DynamicSymbols: + - Name: foo + +## Check that we report a warning when a SHT_GNU_verdef section contains a version definition +## that goes past the end of the section. + +# RUN: yaml2obj %s --docnum=5 -o %t5 +# RUN: llvm-readobj -V %t5 2>&1 | FileCheck %s --check-prefix=DEF-PAST-END -DFILE=%t5 +# RUN: llvm-readelf -V %t5 2>&1 | FileCheck %s --check-prefix=DEF-PAST-END -DFILE=%t5 + +# DEF-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: version definition 1 goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - FOO + ShSize: 1 +DynamicSymbols: + - Name: foo + +## Check that we report a warning when a SHT_GNU_verdef section contains a version definition +## that refers to an auxiliary entry that goes past the end of the section. + +# RUN: yaml2obj %s --docnum=6 -o %t6 +# RUN: llvm-readobj -V %t6 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t6 +# RUN: llvm-readelf -V %t6 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t6 + +# AUX-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: version definition 1 refers to an auxiliary entry that goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - FOO + ShSize: 21 +DynamicSymbols: + - Name: foo + +## Check that we can dump a SHT_GNU_verdef section properly even if it contains version names strings +## that overrun the linked string table. + +# RUN: yaml2obj %s --docnum=7 -o %t7 +# RUN: llvm-readobj -V %t7 2>&1 | FileCheck %s --check-prefix=PAST-STRTAB-END-LLVM --implicit-check-not="warning:" -DFILE=%t7 +# RUN: llvm-readelf -V %t7 2>&1 | FileCheck %s --check-prefix=PAST-STRTAB-END-GNU --implicit-check-not="warning:" -DFILE=%t7 + +# PAST-STRTAB-END-LLVM: VersionDefinitions [ +# PAST-STRTAB-END-LLVM-NEXT: Definition { +# PAST-STRTAB-END-LLVM-NEXT: Version: 0 +# PAST-STRTAB-END-LLVM-NEXT: Flags [ (0x0) +# PAST-STRTAB-END-LLVM-NEXT: ] +# PAST-STRTAB-END-LLVM-NEXT: Index: 0 +# PAST-STRTAB-END-LLVM-NEXT: Hash: 0 +# PAST-STRTAB-END-LLVM-NEXT: Name: +# PAST-STRTAB-END-LLVM-NEXT: } +# PAST-STRTAB-END-LLVM-NEXT: ] + +# PAST-STRTAB-END-GNU: Version definition section '.gnu.version_d' contains 1 entries: +# PAST-STRTAB-END-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 2 (.strtab) +# PAST-STRTAB-END-GNU-NEXT: 0x0000: Rev: 0 Flags: none Index: 0 Cnt: 1 Name: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .strtab + Info: 0x1 + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - FOO + - Name: .strtab + Type: SHT_STRTAB +DynamicSymbols: + - Name: BAR + +## Check we report a warning when a version definition is not correctly aligned in memory. + +# RUN: yaml2obj %s --docnum=8 -o %t8 +# RUN: llvm-readobj -V %t8 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t8 +# RUN: llvm-readelf -V %t8 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t8 + +# MISALIGNED-DEF: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: found a misaligned version definition entry at offset 0x0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Type: Fill + Size: 0x1 + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - FOO +DynamicSymbols: + - Name: foo + +## Check we report a warning when an auxiliary entry is not correctly aligned in memory. + +# RUN: yaml2obj %s --docnum=9 -o %t9 +# RUN: llvm-readobj -V %t9 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t9 +# RUN: llvm-readelf -V %t9 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t9 + +# MISALIGNED-AUX: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: found a misaligned auxiliary entry at offset 0x13 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Link: .dynstr + Info: 0x1 +## The byte offset to the auxiliary entry is 0x13, i.e. it is not correctly aligned in memory. + Content: "0000000000000100000000001300000000000000" +DynamicSymbols: + - Name: foo + Binding: STB_GLOBAL diff --git a/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml b/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml index 9493ab460b70b..960acbdb74043 100644 --- a/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml @@ -31,7 +31,7 @@ # CHECK-NEXT: Index: 3 # CHECK-NEXT: Hash: 108387922 # CHECK-NEXT: Name: VERSION_2 -# CHECK-NEXT: Predecessor: VERSION_3 +# CHECK-NEXT: Predecessors: [VERSION_3] # CHECK-NEXT: } # CHECK-NEXT: ] diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index f6975bdb45e8f..5127939f0180a 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -151,6 +151,24 @@ struct DynRegionInfo { } }; +namespace { +struct VerdAux { + unsigned Offset; + std::string Name; +}; + +struct VerDef { + unsigned Offset; + unsigned Version; + unsigned Flags; + unsigned Ndx; + unsigned Cnt; + unsigned Hash; + std::string Name; + std::vector AuxV; +}; +} // namespace + template class ELFDumper : public ObjDumper { public: ELFDumper(const object::ELFObjectFile *ObjF, ScopedPrinter &Writer); @@ -324,8 +342,106 @@ template class ELFDumper : public ObjDumper { const DynRegionInfo &getDynamicTableRegion() const { return DynamicTable; } const Elf_Hash *getHashTable() const { return HashTable; } const Elf_GnuHash *getGnuHashTable() const { return GnuHashTable; } + + Expected> + getVersionDefinitions(const Elf_Shdr *Sec) const; }; +template +Expected> +ELFDumper::getVersionDefinitions(const Elf_Shdr *Sec) const { + const ELFFile *Obj = ObjF->getELFFile(); + unsigned SecNdx = Sec - &cantFail(Obj->sections()).front(); + + Expected StrTabSecOrErr = Obj->getSection(Sec->sh_link); + if (!StrTabSecOrErr) + return createError( + "invalid section linked to SHT_GNU_verdef section with index " + + Twine(SecNdx) + ": " + toString(StrTabSecOrErr.takeError())); + + Expected StrTabOrErr = Obj->getStringTable(*StrTabSecOrErr); + if (!StrTabOrErr) + return createError( + "invalid string table linked to SHT_GNU_verdef section with index " + + Twine(SecNdx) + ": " + toString(StrTabOrErr.takeError())); + + Expected> ContentsOrErr = Obj->getSectionContents(Sec); + if (!ContentsOrErr) + return createError( + "cannot read content of SHT_GNU_verdef section with index " + + Twine(SecNdx) + ": " + toString(ContentsOrErr.takeError())); + + const uint8_t *Start = ContentsOrErr->data(); + const uint8_t *End = Start + ContentsOrErr->size(); + + auto ExtractNextAux = [&](const uint8_t *&VerdauxBuf, + unsigned VerDefNdx) -> Expected { + if (VerdauxBuf + sizeof(Elf_Verdaux) > End) + return createError("invalid SHT_GNU_verdef section with index " + + Twine(SecNdx) + ": version definition " + + Twine(VerDefNdx) + + " refers to an auxiliary entry that goes past the end " + "of the section"); + + auto *Verdaux = reinterpret_cast(VerdauxBuf); + VerdauxBuf += Verdaux->vda_next; + + VerdAux Aux; + Aux.Offset = VerdauxBuf - Start; + if (Verdaux->vda_name <= StrTabOrErr->size()) + Aux.Name = StrTabOrErr->drop_front(Verdaux->vda_name); + else + Aux.Name = "vda_name) + ">"; + return Aux; + }; + + std::vector Ret; + const uint8_t *VerdefBuf = Start; + for (unsigned I = 1; I <= /*VerDefsNum=*/Sec->sh_info; ++I) { + if (VerdefBuf + sizeof(Elf_Verdef) > End) + return createError("invalid SHT_GNU_verdef section with index " + + Twine(SecNdx) + ": version definition " + Twine(I) + + " goes past the end of the section"); + + if (uintptr_t(VerdefBuf) % sizeof(uint32_t) != 0) + return createError( + "invalid SHT_GNU_verdef section with index " + Twine(SecNdx) + + ": found a misaligned version definition entry at offset 0x" + + Twine::utohexstr(VerdefBuf - Start)); + + const Elf_Verdef *D = reinterpret_cast(VerdefBuf); + VerDef &VD = *Ret.emplace(Ret.end()); + VD.Offset = VerdefBuf - Start; + VD.Version = D->vd_version; + VD.Flags = D->vd_flags; + VD.Ndx = D->vd_ndx; + VD.Cnt = D->vd_cnt; + VD.Hash = D->vd_hash; + + const uint8_t *VerdauxBuf = VerdefBuf + D->vd_aux; + for (unsigned J = 0; J < D->vd_cnt; ++J) { + if (uintptr_t(VerdauxBuf) % sizeof(uint32_t) != 0) + return createError("invalid SHT_GNU_verdef section with index " + + Twine(SecNdx) + + ": found a misaligned auxiliary entry at offset 0x" + + Twine::utohexstr(VerdauxBuf - Start)); + + Expected AuxOrErr = ExtractNextAux(VerdauxBuf, I); + if (!AuxOrErr) + return AuxOrErr.takeError(); + + if (J == 0) + VD.Name = AuxOrErr->Name; + else + VD.AuxV.push_back(*AuxOrErr); + } + + VerdefBuf += D->vd_next; + } + + return Ret; +} + template void ELFDumper::printSymbolsHelper(bool IsDynamic) const { StringRef StrTable, SymtabName; @@ -3901,42 +4017,26 @@ void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, if (!Sec) return; - unsigned VerDefsNum = Sec->sh_info; - printGNUVersionSectionProlog(OS, "Version definition", VerDefsNum, Obj, Sec, + printGNUVersionSectionProlog(OS, "Version definition", Sec->sh_info, Obj, Sec, this->FileName); - const Elf_Shdr *StrTabSec = - unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); - StringRef StringTable( - reinterpret_cast(Obj->base() + StrTabSec->sh_offset), - (size_t)StrTabSec->sh_size); - - const uint8_t *VerdefBuf = - unwrapOrError(this->FileName, Obj->getSectionContents(Sec)).data(); - const uint8_t *Begin = VerdefBuf; - - while (VerDefsNum--) { - const Elf_Verdef *Verdef = reinterpret_cast(VerdefBuf); - OS << format(" 0x%04x: Rev: %u Flags: %s Index: %u Cnt: %u", - VerdefBuf - Begin, (unsigned)Verdef->vd_version, - versionFlagToString(Verdef->vd_flags).c_str(), - (unsigned)Verdef->vd_ndx, (unsigned)Verdef->vd_cnt); - - const uint8_t *VerdauxBuf = VerdefBuf + Verdef->vd_aux; - const Elf_Verdaux *Verdaux = - reinterpret_cast(VerdauxBuf); - OS << format(" Name: %s\n", - StringTable.drop_front(Verdaux->vda_name).data()); - - for (unsigned I = 1; I < Verdef->vd_cnt; ++I) { - VerdauxBuf += Verdaux->vda_next; - Verdaux = reinterpret_cast(VerdauxBuf); - OS << format(" 0x%04x: Parent %u: %s\n", VerdauxBuf - Begin, I, - StringTable.drop_front(Verdaux->vda_name).data()); - } + Expected> V = this->dumper()->getVersionDefinitions(Sec); + if (!V) { + this->reportUniqueWarning(V.takeError()); + return; + } - VerdefBuf += Verdef->vd_next; + for (const VerDef &Def : *V) { + OS << format(" 0x%04x: Rev: %u Flags: %s Index: %u Cnt: %u Name: %s\n", + Def.Offset, Def.Version, + versionFlagToString(Def.Flags).c_str(), Def.Ndx, Def.Cnt, + Def.Name.data()); + unsigned I = 0; + for (const VerdAux &Aux : Def.AuxV) + OS << format(" 0x%04x: Parent %u: %s\n", Aux.Offset, ++I, + Aux.Name.data()); } + OS << '\n'; } @@ -5713,44 +5813,25 @@ void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, if (!Sec) return; - const uint8_t *SecStartAddress = - reinterpret_cast(Obj->base() + Sec->sh_offset); - const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size; - const uint8_t *VerdefBuf = SecStartAddress; - const Elf_Shdr *StrTab = - unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); - - unsigned VerDefsNum = Sec->sh_info; - while (VerDefsNum--) { - if (VerdefBuf + sizeof(Elf_Verdef) > SecEndAddress) - // FIXME: report_fatal_error is not a good way to report error. We should - // emit a parsing error here and below. - report_fatal_error("invalid offset in the section"); + Expected> V = this->dumper()->getVersionDefinitions(Sec); + if (!V) { + this->reportUniqueWarning(V.takeError()); + return; + } - const Elf_Verdef *Verdef = reinterpret_cast(VerdefBuf); + for (const VerDef &D : *V) { DictScope Def(W, "Definition"); - W.printNumber("Version", Verdef->vd_version); - W.printFlags("Flags", Verdef->vd_flags, makeArrayRef(SymVersionFlags)); - W.printNumber("Index", Verdef->vd_ndx); - W.printNumber("Hash", Verdef->vd_hash); - W.printString("Name", StringRef(reinterpret_cast( - Obj->base() + StrTab->sh_offset + - Verdef->getAux()->vda_name))); - if (!Verdef->vd_cnt) - report_fatal_error("at least one definition string must exist"); - if (Verdef->vd_cnt > 2) - report_fatal_error("more than one predecessor is not expected"); - - if (Verdef->vd_cnt == 2) { - const uint8_t *VerdauxBuf = - VerdefBuf + Verdef->vd_aux + Verdef->getAux()->vda_next; - const Elf_Verdaux *Verdaux = - reinterpret_cast(VerdauxBuf); - W.printString("Predecessor", - StringRef(reinterpret_cast( - Obj->base() + StrTab->sh_offset + Verdaux->vda_name))); - } - VerdefBuf += Verdef->vd_next; + W.printNumber("Version", D.Version); + W.printFlags("Flags", D.Flags, makeArrayRef(SymVersionFlags)); + W.printNumber("Index", D.Ndx); + W.printNumber("Hash", D.Hash); + W.printString("Name", D.Name.c_str()); + + if (D.AuxV.empty()) + continue; + W.printList( + "Predecessors", D.AuxV, + [](raw_ostream &OS, const VerdAux &Aux) { OS << Aux.Name.c_str(); }); } } From d7be3eab5c0e1598e919973ed68a200997a4734a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Marques?= Date: Tue, 26 Nov 2019 14:24:59 +0000 Subject: [PATCH 048/591] [RISCV] Handle fcopysign(f32, f64) and fcopysign(f64, f32) Summary: Adds tablegen patterns to explicitly handle fcopysign where the magnitude and sign arguments have different types, due to the sign value casts being removed the by DAGCombiner. Support for RV32IF follows in a separate commit. Adds tests for all relevant scenarios except RV32IF. Reviewers: lenary Reviewed By: lenary Tags: #llvm Differential Revision: https://reviews.llvm.org/D70678 --- llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 3 + llvm/test/CodeGen/RISCV/copysign-casts.ll | 92 +++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/copysign-casts.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index fe38c4ff02d33..b5343e8a83098 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -231,6 +231,9 @@ def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>; def : PatFpr64Fpr64; def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>; +def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>; +def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2, + 0b111))>; // fmadd: rs1 * rs2 + rs3 def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3), diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll new file mode 100644 index 0000000000000..45faa2306149d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -target-abi ilp32d < %s | FileCheck %s -check-prefix=RV32IFD +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -target-abi lp64d < %s | FileCheck %s -check-prefix=RV64IFD + +; Test fcopysign scenarios where the sign argument is casted to the type of the +; magnitude argument. Those casts can be folded away by the DAGCombiner. + +declare double @llvm.copysign.f64(double, double) +declare float @llvm.copysign.f32(float, float) + +define double @fold_promote(double %a, float %b) nounwind { +; RV32I-LABEL: fold_promote: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a3, 524288 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_promote: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: slli a2, a2, 31 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32IFD-LABEL: fold_promote: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fcvt.d.s ft0, fa1 +; RV32IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_promote: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.s ft0, fa1 +; RV64IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFD-NEXT: ret + %c = fpext float %b to double + %t = call double @llvm.copysign.f64(double %a, double %c) + ret double %t +} + +define float @fold_demote(float %a, double %b) nounwind { +; RV32I-LABEL: fold_demote: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_demote: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32IFD-LABEL: fold_demote: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fcvt.s.d ft0, fa1 +; RV32IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_demote: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.s.d ft0, fa1 +; RV64IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFD-NEXT: ret + %c = fptrunc double %b to float + %t = call float @llvm.copysign.f32(float %a, float %c) + ret float %t +} From 6fd4c42fa815952b29bee573068d60d13f7c9f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Marques?= Date: Sun, 24 Nov 2019 15:23:29 +0000 Subject: [PATCH 049/591] [LegalizeTypes][RISCV] Soften FCOPYSIGN operand Summary: Adds support for softening FCOPYSIGN operands. Adds RISC-V tests that exercise the new softening code. Reviewers: asb, lenary, efriedma Reviewed By: efriedma Tags: #llvm Differential Revision: https://reviews.llvm.org/D70679 --- .../SelectionDAG/LegalizeFloatTypes.cpp | 35 +++++++++++++++++++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + llvm/test/CodeGen/RISCV/copysign-casts.ll | 18 ++++++++++ 3 files changed, 54 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c4a74d5c1c714..8dbff7d273571 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -854,6 +854,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -1036,6 +1037,40 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { ST->getMemOperand()); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + SDLoc dl(N); + + EVT LVT = LHS.getValueType(); + EVT ILVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits()); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RSize - LSize; + if (SizeDiff > 0) { + RHS = + DAG.getNode(ISD::SRL, dl, RVT, RHS, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(RHS.getValueType(), + DAG.getDataLayout()))); + RHS = DAG.getNode(ISD::TRUNCATE, dl, ILVT, RHS); + } else if (SizeDiff < 0) { + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, RHS); + RHS = + DAG.getNode(ISD::SHL, dl, ILVT, RHS, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(RHS.getValueType(), + DAG.getDataLayout()))); + } + + RHS = DAG.getBitcast(LVT, RHS); + return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c944bda3700bf..89410ccd857b0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -540,6 +540,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); //===--------------------------------------------------------------------===// // Float Expansion Support: LegalizeFloatTypes.cpp diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll index 45faa2306149d..acd64c203657a 100644 --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck %s -check-prefix=RV32I ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f \ +; RUN: -target-abi ilp32f < %s | FileCheck %s -check-prefix=RV32IF ; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \ ; RUN: -target-abi ilp32d < %s | FileCheck %s -check-prefix=RV32IFD ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ @@ -37,6 +39,16 @@ define double @fold_promote(double %a, float %b) nounwind { ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; +; RV32IF-LABEL: fold_promote: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.x.w a2, fa0 +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: and a2, a2, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a1, a3 +; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: ret +; ; RV32IFD-LABEL: fold_promote: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: fcvt.d.s ft0, fa1 @@ -75,6 +87,12 @@ define float @fold_demote(float %a, double %b) nounwind { ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; +; RV32IF-LABEL: fold_demote: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a1 +; RV32IF-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IF-NEXT: ret +; ; RV32IFD-LABEL: fold_demote: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: fcvt.s.d ft0, fa1 From aa0e92e1f7069834852c08fdd32a92258e30555c Mon Sep 17 00:00:00 2001 From: Alexander Kornienko Date: Fri, 22 Nov 2019 12:22:40 +0100 Subject: [PATCH 050/591] [clang-tidy] Use range-for for check registration. NFC Actually, just testing GitHub commit rights. --- clang-tools-extra/clang-tidy/ClangTidy.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp index 91e8ebee13686..40aaf402ec0e1 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp @@ -314,10 +314,8 @@ ClangTidyASTConsumerFactory::ClangTidyASTConsumerFactory( IntrusiveRefCntPtr OverlayFS) : Context(Context), OverlayFS(OverlayFS), CheckFactories(new ClangTidyCheckFactories) { - for (ClangTidyModuleRegistry::iterator I = ClangTidyModuleRegistry::begin(), - E = ClangTidyModuleRegistry::end(); - I != E; ++I) { - std::unique_ptr Module(I->instantiate()); + for (ClangTidyModuleRegistry::entry E : ClangTidyModuleRegistry::entries()) { + std::unique_ptr Module = E.instantiate(); Module->addCheckFactories(*CheckFactories); } } From 3cd9a8b7dc61e3ca5829225548d6346b8a5a25bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Tue, 26 Nov 2019 16:45:43 +0100 Subject: [PATCH 051/591] [lldb] [test] Un-XFAIL lldb-server tests fixed on NetBSD --- .../lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py index 7a39079b472a8..2b7f28a3aefbc 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/TestLldbGdbServer.py @@ -675,7 +675,6 @@ def test_Hg_switches_to_3_threads_launch_debugserver(self): self.Hg_switches_to_3_threads() @expectedFailureAll(oslist=["windows"]) # expect 4 threads - @expectedFailureNetBSD @llgs_test def test_Hg_switches_to_3_threads_launch_llgs(self): self.init_llgs_test() @@ -1583,7 +1582,6 @@ def test_P_and_p_thread_suffix_work_debugserver(self): self.P_and_p_thread_suffix_work() @skipIfWindows - @expectedFailureNetBSD @llgs_test def test_P_and_p_thread_suffix_work_llgs(self): self.init_llgs_test() From a913e872d6e7044ae77e55c45ab3ea5304eb7262 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 25 Nov 2019 16:25:27 -0500 Subject: [PATCH 052/591] [OPENMP]Fix PR44133: crash on lambda reductions in templates. Need to perform the instantiation of the combiner/initializer even if the resulting type is not dependent, if the construct is defined in templates in some cases. --- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 83 ++++++++++++------- ...declare_reduction_codegen_in_templates.cpp | 43 ++++++++++ 2 files changed, 94 insertions(+), 32 deletions(-) create mode 100644 clang/test/OpenMP/declare_reduction_codegen_in_templates.cpp diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 63777d5272b78..a2fd8a92dd61e 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -3067,6 +3067,17 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( } else { SubstReductionType = D->getType(); } + Expr *Combiner = D->getCombiner(); + Expr *Init = D->getInitializer(); + const bool CombinerRequiresInstantiation = + Combiner && + (Combiner->isValueDependent() || Combiner->isInstantiationDependent() || + Combiner->isTypeDependent() || + Combiner->containsUnexpandedParameterPack()); + const bool InitRequiresInstantiation = + Init && + (Init->isValueDependent() || Init->isInstantiationDependent() || + Init->isTypeDependent() || Init->containsUnexpandedParameterPack()); if (SubstReductionType.isNull()) return nullptr; bool IsCorrect = !SubstReductionType.isNull(); @@ -3084,11 +3095,12 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( PrevDeclInScope); auto *NewDRD = cast(DRD.get().getSingleDecl()); SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDRD); - if (!RequiresInstantiation) { - if (Expr *Combiner = D->getCombiner()) { + if (!RequiresInstantiation && !CombinerRequiresInstantiation && + !InitRequiresInstantiation) { + if (Combiner) { NewDRD->setCombinerData(D->getCombinerIn(), D->getCombinerOut()); NewDRD->setCombiner(Combiner); - if (Expr *Init = D->getInitializer()) { + if (Init) { NewDRD->setInitializerData(D->getInitOrig(), D->getInitPriv()); NewDRD->setInitializer(Init, D->getInitializerKind()); } @@ -3100,22 +3112,32 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( Expr *SubstCombiner = nullptr; Expr *SubstInitializer = nullptr; // Combiners instantiation sequence. - if (D->getCombiner()) { - SemaRef.ActOnOpenMPDeclareReductionCombinerStart( - /*S=*/nullptr, NewDRD); - SemaRef.CurrentInstantiationScope->InstantiatedLocal( - cast(D->getCombinerIn())->getDecl(), - cast(NewDRD->getCombinerIn())->getDecl()); - SemaRef.CurrentInstantiationScope->InstantiatedLocal( - cast(D->getCombinerOut())->getDecl(), - cast(NewDRD->getCombinerOut())->getDecl()); - auto *ThisContext = dyn_cast_or_null(Owner); - Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(), - ThisContext); - SubstCombiner = SemaRef.SubstExpr(D->getCombiner(), TemplateArgs).get(); - SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner); - // Initializers instantiation sequence. - if (D->getInitializer()) { + if (Combiner) { + if (!CombinerRequiresInstantiation) { + NewDRD->setCombinerData(D->getCombinerIn(), D->getCombinerOut()); + NewDRD->setCombiner(Combiner); + } else { + SemaRef.ActOnOpenMPDeclareReductionCombinerStart( + /*S=*/nullptr, NewDRD); + SemaRef.CurrentInstantiationScope->InstantiatedLocal( + cast(D->getCombinerIn())->getDecl(), + cast(NewDRD->getCombinerIn())->getDecl()); + SemaRef.CurrentInstantiationScope->InstantiatedLocal( + cast(D->getCombinerOut())->getDecl(), + cast(NewDRD->getCombinerOut())->getDecl()); + auto *ThisContext = dyn_cast_or_null(Owner); + Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(), + ThisContext); + SubstCombiner = SemaRef.SubstExpr(Combiner, TemplateArgs).get(); + SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner); + } + } + // Initializers instantiation sequence. + if (Init) { + if (!InitRequiresInstantiation) { + NewDRD->setInitializerData(D->getInitOrig(), D->getInitPriv()); + NewDRD->setInitializer(Init, D->getInitializerKind()); + } else { VarDecl *OmpPrivParm = SemaRef.ActOnOpenMPDeclareReductionInitializerStart( /*S=*/nullptr, NewDRD); @@ -3126,8 +3148,7 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( cast(D->getInitPriv())->getDecl(), cast(NewDRD->getInitPriv())->getDecl()); if (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit) { - SubstInitializer = - SemaRef.SubstExpr(D->getInitializer(), TemplateArgs).get(); + SubstInitializer = SemaRef.SubstExpr(Init, TemplateArgs).get(); } else { auto *OldPrivParm = cast(cast(D->getInitPriv())->getDecl()); @@ -3139,19 +3160,17 @@ Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( SemaRef.ActOnOpenMPDeclareReductionInitializerEnd( NewDRD, SubstInitializer, OmpPrivParm); } - IsCorrect = - IsCorrect && SubstCombiner && - (!D->getInitializer() || - (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit && - SubstInitializer) || - (D->getInitializerKind() != OMPDeclareReductionDecl::CallInit && - !SubstInitializer && !SubstInitializer)); - } else { - IsCorrect = false; } + IsCorrect = IsCorrect && (!CombinerRequiresInstantiation || SubstCombiner) && + (!InitRequiresInstantiation || + (!Init || + (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit && + SubstInitializer) || + (D->getInitializerKind() != OMPDeclareReductionDecl::CallInit && + !SubstInitializer))); - (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd(/*S=*/nullptr, DRD, - IsCorrect); + (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd( + /*S=*/nullptr, DRD, IsCorrect && !D->isInvalidDecl()); return NewDRD; } diff --git a/clang/test/OpenMP/declare_reduction_codegen_in_templates.cpp b/clang/test/OpenMP/declare_reduction_codegen_in_templates.cpp new file mode 100644 index 0000000000000..0409c02191445 --- /dev/null +++ b/clang/test/OpenMP/declare_reduction_codegen_in_templates.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++17 -emit-llvm %s -triple x86_64-linux -fexceptions -fcxx-exceptions -o - -femit-all-decls -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++17 -triple x86_64-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s -femit-all-decls -disable-llvm-passes +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-linux -fexceptions -fcxx-exceptions -std=c++17 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls -disable-llvm-passes | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++17 -emit-llvm %s -triple x86_64-linux -fexceptions -fcxx-exceptions -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++17 -triple x86_64-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s -femit-all-decls -disable-llvm-passes +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-linux -fexceptions -fcxx-exceptions -std=c++17 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls -disable-llvm-passes | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[STD_D:%.+]]*)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), [[STD_D]]* %{{.+}}) + +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, [[STD_D]]* {{.+}}) +// CHECK: call i32 @__kmpc_reduce_nowait(%struct.ident_t* + +#ifndef HEADER +#define HEADER + +typedef long unsigned a; +namespace std { +template class initializer_list { + const int *b; + a c; +}; +template class d {}; +template class f { +public: + f(initializer_list); +}; +} // namespace std +template void foo(g, h) { + std::d i; +#pragma omp declare reduction(j : std::d : []{}()) +#pragma omp parallel reduction(j : i) + ; +} +void k() { + std::f l{}; + std::f m{2}; + foo(l, m); +} + +#endif // HEADER From 008e65a7bfb320bf197a04ff6427da84f8d38b76 Mon Sep 17 00:00:00 2001 From: vpykhtin Date: Mon, 18 Nov 2019 20:06:48 +0300 Subject: [PATCH 053/591] [AMDGPU] Fix emitIfBreak CF lowering: use temp reg to make register coalescer life easier. Differential revision: https://reviews.llvm.org/D70405 --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 7 +++-- .../divergent-branch-uniform-condition.ll | 26 ++++++++--------- llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll | 5 ++-- llvm/test/CodeGen/AMDGPU/loop_break.ll | 7 ++--- llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 28 +++++++++---------- llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll | 3 +- llvm/test/CodeGen/AMDGPU/valu-i1.ll | 5 ++-- llvm/test/CodeGen/AMDGPU/wave32.ll | 14 ++++------ 8 files changed, 45 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 6f9abd3a8d9b9..bf052dc3c9304 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -372,12 +372,15 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) { // exit" mask. MachineInstr *And = nullptr, *Or = nullptr; if (!SkipAnding) { - And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), Dst) + Register AndReg = MRI->createVirtualRegister(BoolRC); + And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg) .addReg(Exec) .add(MI.getOperand(1)); Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst) - .addReg(Dst) + .addReg(AndReg) .add(MI.getOperand(2)); + if (LIS) + LIS->createAndComputeVirtRegInterval(AndReg); } else Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst) .add(MI.getOperand(1)) diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 0dec67ad340cd..895539c00bce9 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -16,29 +16,28 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x ; CHECK-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: s_mov_b64 s[2:3], 0 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 -; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 +; CHECK-NEXT: ; implicit-def: $sgpr4_sgpr5 ; CHECK-NEXT: s_branch BB0_3 ; CHECK-NEXT: BB0_1: ; %Flow1 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_mov_b64 s[10:11], 0 +; CHECK-NEXT: s_mov_b64 s[8:9], 0 ; CHECK-NEXT: BB0_2: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_and_b64 s[4:5], s[10:11], exec -; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[8:9] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[2:3], s[10:11], s[2:3] +; CHECK-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec +; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3] ; CHECK-NEXT: s_cbranch_execz BB0_6 ; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec ; CHECK-NEXT: s_cmp_lt_u32 s0, 32 -; CHECK-NEXT: s_mov_b64 s[10:11], -1 +; CHECK-NEXT: s_mov_b64 s[8:9], -1 ; CHECK-NEXT: s_cbranch_scc0 BB0_2 ; CHECK-NEXT: ; %bb.4: ; %endif1 ; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 @@ -53,9 +52,9 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 ; CHECK-NEXT: s_branch BB0_1 ; CHECK-NEXT: BB0_6: ; %Flow2 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_b64 exec, exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] +; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5] ; CHECK-NEXT: ; mask branch BB0_8 ; CHECK-NEXT: BB0_7: ; %if1 ; CHECK-NEXT: v_sqrt_f32_e32 v1, v0 @@ -63,6 +62,7 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; CHECK-NEXT: s_endpgm + ; this is the divergent branch with the condition not marked as divergent start: %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll index fff1c22918ec6..51d1c091ab913 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -3,11 +3,10 @@ ; SI-LABEL: {{^}}i1_copy_from_loop: ; -; SI: [[LOOP:BB0_[0-9]+]]: ; %Flow1 -; SI: s_or_b64 exec, exec, [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]] ; SI: ; %Flow +; SI: s_or_b64 [[EXIT_MASK:s\[[0-9]+:[0-9]+\]]] ; SI: s_and_b64 [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_MASK:s\[[0-9]+:[0-9]+\]]], exec -; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], s[6:7], [[ACCUM_MASK]] +; SI: s_or_b64 [[I1_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[ACCUM_MASK]] ; SI: s_cbranch_execz [[FOR_END_LABEL:BB0_[0-9]+]] ; SI: ; %for.body diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index 46c4b1e6b3a1c..684b183de690c 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -40,10 +40,9 @@ ; GCN: [[FLOW]]: ; %Flow ; GCN: ; in Loop: Header=BB0_1 Depth=1 -; GCN: s_and_b64 [[BROKEN_MASK]], exec, [[INNER_MASK]] -; GCN: s_or_b64 [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]] -; GCN: s_mov_b64 [[ACCUM_MASK]], [[BROKEN_MASK]] -; GCN: s_andn2_b64 exec, exec, [[BROKEN_MASK]] +; GCN: s_and_b64 [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]] +; GCN-NEXT: s_or_b64 [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]] +; GCN-NEXT: s_andn2_b64 exec, exec, [[ACCUM_MASK]] ; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]] ; GCN: ; %bb.4: ; %bb9 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index 08d8ec0fba4dc..5222ae56db87a 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -25,22 +25,20 @@ ; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2 -; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 exec, exec, [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]] ; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]] -; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] -; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] -; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN: s_or_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[LEFT_OUTER]] +; GCN: s_andn2_b64 exec, exec, [[LEFT_OUTER]] ; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]] ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} -; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_mov_b64 [[LEFT_INNER]], 0{{$}} ; GCN: ; %Flow ; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]] -; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] -; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] -; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] -; GCN: s_andn2_b64 exec, exec, [[TMP0]] +; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[LEFT_INNER]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_andn2_b64 exec, exec, [[LEFT_INNER]] ; GCN: s_cbranch_execz [[FLOW2]] ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} @@ -82,17 +80,17 @@ ENDIF: ; preds = %LOOP ; OPT: llvm.amdgcn.end.cf ; GCN-LABEL: {{^}}multi_if_break_loop: -; GCN: s_mov_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: s_mov_b64 [[SAVED_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %Flow4 -; GCN: s_and_b64 [[BROKEN_THREADS_MASK]], exec, [[BROKEN_THREADS_MASK]] -; GCN: s_or_b64 [[BROKEN_THREADS_MASK]], [[BROKEN_THREADS_MASK]], [[SAVED:s\[[0-9]+:[0-9]+\]]] -; GCN: s_andn2_b64 exec, exec, [[BROKEN_THREADS_MASK]] +; GCN: s_and_b64 [[ANDTMP0:s\[[0-9]+:[0-9]+\]]], exec, {{s\[[0-9]+:[0-9]+\]}} +; GCN: s_or_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], [[ANDTMP0]], [[SAVED_MASK]] +; GCN: s_and_b64 [[BROKEN_THREADS_MASK:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, exec +; GCN: s_andn2_b64 exec, exec, [[MASK1]] ; GCN-NEXT: s_cbranch_execz [[LOOP_EXIT:BB[0-9]+_[0-9]+]] ; GCN: ; %bb1{{$}} ; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], -; GCN: s_mov_b64 [[SAVED]], [[BROKEN_THREADS_MASK]] ; GCN: ; %LeafBlock1 ; GCN: v_cmp_eq_u32_e32 vcc, 1, [[LOAD0]] @@ -122,7 +120,7 @@ ENDIF: ; preds = %LOOP ; GCN: s_branch [[LOOP]] ; GCN: [[LOOP_EXIT]]: ; %Flow6 -; GCN: s_or_b64 exec, exec, [[BROKEN_THREADS_MASK]] +; GCN: s_or_b64 exec, exec, [[SAVED_MASK]] define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index 14d78fbef29ea..23bb18e738f54 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -37,9 +37,8 @@ ENDIF: ; SI: ; %endif ; SI: [[LOOP_LABEL:BB[0-9]+_[0-9]+]]: ; %loop -; SI: s_mov_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], [[LEFT]] ; SI: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[PHI]] -; SI: s_or_b64 [[LEFT]], [[TMP1]], [[TMP]] +; SI: s_or_b64 [[LEFT]], [[TMP1]], [[LEFT]] ; SI: s_andn2_b64 exec, exec, [[LEFT]] ; SI: s_cbranch_execnz [[LOOP_LABEL]] ; SI: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index ef17825024eda..ea74268dbe7c2 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -223,9 +223,8 @@ exit: ; SI-NEXT: ; in Loop: Header=[[LABEL_LOOP]] ; SI-NEXT: s_or_b64 exec, exec, [[ORNEG2]] ; SI-NEXT: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], -; SI-NEXT: s_or_b64 [[TMP2:s\[[0-9]+:[0-9]+\]]], [[TMP1]], [[COND_STATE]] -; SI-NEXT: s_mov_b64 [[COND_STATE]], [[TMP2]] -; SI-NEXT: s_andn2_b64 exec, exec, [[TMP2]] +; SI-NEXT: s_or_b64 [[COND_STATE]], [[TMP1]], [[COND_STATE]] +; SI-NEXT: s_andn2_b64 exec, exec, [[COND_STATE]] ; SI-NEXT: s_cbranch_execnz [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 91a993181979d..92808fec360f4 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -243,14 +243,12 @@ bb13: ; GFX1032: s_or_b32 [[MASK1]], [[MASK1]], [[MASK0]] ; GFX1064: s_or_b64 [[MASK1]], [[MASK1]], [[MASK0]] ; GCN: BB{{.*}}: ; %Flow -; GFX1032: s_and_b32 [[MASK0:s[0-9]+]], exec_lo, [[MASK1]] -; GFX1064: s_and_b64 [[MASK0:s\[[0-9:]+\]]], exec, [[MASK1]] -; GFX1032: s_or_b32 [[MASK0]], [[MASK0]], [[ACC:s[0-9]+]] -; GFX1064: s_or_b64 [[MASK0]], [[MASK0]], [[ACC:s\[[0-9:]+\]]] -; GFX1032: s_mov_b32 [[ACC]], [[MASK0]] -; GFX1064: s_mov_b64 [[ACC]], [[MASK0]] -; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[MASK0]] -; GFX1064: s_andn2_b64 exec, exec, [[MASK0]] +; GFX1032: s_and_b32 [[TMP0:s[0-9]+]], exec_lo, [[MASK1]] +; GFX1064: s_and_b64 [[TMP0:s\[[0-9:]+\]]], exec, [[MASK1]] +; GFX1032: s_or_b32 [[ACC:s[0-9]+]], [[TMP0]], [[ACC]] +; GFX1064: s_or_b64 [[ACC:s\[[0-9:]+\]]], [[TMP0]], [[ACC]] +; GFX1032: s_andn2_b32 exec_lo, exec_lo, [[ACC]] +; GFX1064: s_andn2_b64 exec, exec, [[ACC]] ; GCN: s_cbranch_execz ; GCN: BB{{.*}}: ; GCN: s_load_dword [[LOAD:s[0-9]+]] From 7707d8aa9db8aa3814593f9c40cc707f306e3ae2 Mon Sep 17 00:00:00 2001 From: jasonliu Date: Tue, 26 Nov 2019 16:05:26 +0000 Subject: [PATCH 054/591] [XCOFF][AIX] Check linkage on the function, and two fixes for comments This is a follow up commit to address post-commit comment in D70443 Differential revision: https://reviews.llvm.org/D70443 --- llvm/lib/MC/XCOFFObjectWriter.cpp | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++----- .../PowerPC/aix-weak-undef-func-call.ll | 30 +++++++++++++++++++ 3 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-weak-undef-func-call.ll diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index ca96a0ecf9ff5..ab0d9048ecf71 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -574,7 +574,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) { // yet, so start at index 0. uint32_t SymbolTableIndex = 0; - // Calculate undefined symbol's indices. + // Calculate indices for undefined symbols. for (auto &Csect : UndefinedCsects) { Csect.Size = 0; Csect.Address = 0; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f95f8be8a0481..3c59cea7f96e2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -52,6 +52,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" @@ -5326,16 +5327,19 @@ SDValue PPCTargetLowering::FinishCall( GlobalAddressSDNode *G = cast(Callee); auto &Context = DAG.getMachineFunction().getMMI().getContext(); + const GlobalObject *GO = cast(G->getGlobal()); MCSymbolXCOFF *S = cast(Context.getOrCreateSymbol( - Twine(".") + Twine(G->getGlobal()->getName()))); - - const GlobalValue *GV = G->getGlobal(); - if (GV && GV->isDeclaration() && !S->hasContainingCsect()) { - // On AIX, undefined symbol need to associate with a MCSectionXCOFF to - // get the correct storage mapping class. In this case, XCOFF::XMC_PR. + Twine(".") + Twine(GO->getName()))); + + if (GO && GO->isDeclaration() && !S->hasContainingCsect()) { + // On AIX, an undefined symbol needs to be associated with a + // MCSectionXCOFF to get the correct storage mapping class. + // In this case, XCOFF::XMC_PR. + const XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); MCSectionXCOFF *Sec = Context.getXCOFFSection(S->getName(), XCOFF::XMC_PR, XCOFF::XTY_ER, - XCOFF::C_EXT, SectionKind::getMetadata()); + SC, SectionKind::getMetadata()); S->setContainingCsect(Sec); } diff --git a/llvm/test/CodeGen/PowerPC/aix-weak-undef-func-call.ll b/llvm/test/CodeGen/PowerPC/aix-weak-undef-func-call.ll new file mode 100644 index 0000000000000..9fb3dec19edf2 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-weak-undef-func-call.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -filetype=obj -o %t.o < %s +; RUN: llvm-readobj --symbols %t.o | FileCheck %s + +define void @bar() { +entry: + call void bitcast (void (...)* @foo to void ()*)() + ret void +} + +declare extern_weak void @foo(...) + +;CHECK: Symbol { +;CHECK: Name: .foo +;CHECK-NEXT: Value (RelocatableAddress): 0x0 +;CHECK-NEXT: Section: N_UNDEF +;CHECK-NEXT: Type: 0x0 +;CHECK-NEXT: StorageClass: C_WEAKEXT (0x6F) +;CHECK-NEXT: NumberOfAuxEntries: 1 +;CHECK-NEXT: CSECT Auxiliary Entry { +;CHECK: SectionLen: 0 +;CHECK-NEXT: ParameterHashIndex: 0x0 +;CHECK-NEXT: TypeChkSectNum: 0x0 +;CHECK-NEXT: SymbolAlignmentLog2: 0 +;CHECK-NEXT: SymbolType: XTY_ER (0x0) +;CHECK-NEXT: StorageMappingClass: XMC_PR (0x0) +;CHECK-NEXT: StabInfoIndex: 0x0 +;CHECK-NEXT: StabSectNum: 0x0 +;CHECK-NEXT: } +;CHECK-NEXT: } + From 184d72a7c6a7f40e752a044eb0336cbd4c43d004 Mon Sep 17 00:00:00 2001 From: stozer Date: Fri, 22 Nov 2019 16:40:32 +0000 Subject: [PATCH 055/591] [DebugInfo] Disallow fragmenting DIExpressions with shift operators DIExpressions with shift operators should not be fragmented for the same reason as arithmetic operators: carry over cannot be expressed from one fragment to the other, so an invalid result would be produced. Differential Revision: https://reviews.llvm.org/D70601 --- llvm/lib/IR/DebugInfoMetadata.cpp | 8 ++++-- llvm/unittests/IR/MetadataTest.cpp | 43 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index e4036ee1eb0c9..b0102275b36eb 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1148,10 +1148,14 @@ Optional DIExpression::createFragmentExpression( for (auto Op : Expr->expr_ops()) { switch (Op.getOp()) { default: break; + case dwarf::DW_OP_shr: + case dwarf::DW_OP_shra: + case dwarf::DW_OP_shl: case dwarf::DW_OP_plus: + case dwarf::DW_OP_plus_uconst: case dwarf::DW_OP_minus: - // We can't safely split arithmetic into multiple fragments because we - // can't express carry-over between fragments. + // We can't safely split arithmetic or shift operations into multiple + // fragments because we can't express carry-over between fragments. // // FIXME: We *could* preserve the lowest fragment of a constant offset // operation if the offset fits into SizeInBits. diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp index e6c7a50113957..99295681ec88b 100644 --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -2394,6 +2394,49 @@ TEST_F(DIExpressionTest, isValid) { #undef EXPECT_INVALID } +TEST_F(DIExpressionTest, createFragmentExpression) { +#define EXPECT_VALID_FRAGMENT(Offset, Size, ...) \ + do { \ + uint64_t Elements[] = {__VA_ARGS__}; \ + DIExpression* Expression = DIExpression::get(Context, Elements); \ + EXPECT_TRUE(DIExpression::createFragmentExpression( \ + Expression, Offset, Size).hasValue()); \ + } while (false) +#define EXPECT_INVALID_FRAGMENT(Offset, Size, ...) \ + do { \ + uint64_t Elements[] = {__VA_ARGS__}; \ + DIExpression* Expression = DIExpression::get(Context, Elements); \ + EXPECT_FALSE(DIExpression::createFragmentExpression( \ + Expression, Offset, Size).hasValue()); \ + } while (false) + + // createFragmentExpression adds correct ops. + Optional R = DIExpression::createFragmentExpression( + DIExpression::get(Context, {}), 0, 32); + EXPECT_EQ(R.hasValue(), true); + EXPECT_EQ(3u, (*R)->getNumElements()); + EXPECT_EQ(dwarf::DW_OP_LLVM_fragment, (*R)->getElement(0)); + EXPECT_EQ(0u, (*R)->getElement(1)); + EXPECT_EQ(32u, (*R)->getElement(2)); + + // Valid fragment expressions. + EXPECT_VALID_FRAGMENT(0, 32, {}); + EXPECT_VALID_FRAGMENT(0, 32, dwarf::DW_OP_deref); + EXPECT_VALID_FRAGMENT(0, 32, dwarf::DW_OP_LLVM_fragment, 0, 32); + EXPECT_VALID_FRAGMENT(16, 16, dwarf::DW_OP_LLVM_fragment, 0, 32); + + // Invalid fragment expressions (incompatible ops). + EXPECT_INVALID_FRAGMENT(0, 32, dwarf::DW_OP_constu, 6, dwarf::DW_OP_plus); + EXPECT_INVALID_FRAGMENT(0, 32, dwarf::DW_OP_constu, 14, dwarf::DW_OP_minus); + EXPECT_INVALID_FRAGMENT(0, 32, dwarf::DW_OP_constu, 16, dwarf::DW_OP_shr); + EXPECT_INVALID_FRAGMENT(0, 32, dwarf::DW_OP_constu, 16, dwarf::DW_OP_shl); + EXPECT_INVALID_FRAGMENT(0, 32, dwarf::DW_OP_constu, 16, dwarf::DW_OP_shra); + EXPECT_INVALID_FRAGMENT(0, 32, dwarf::DW_OP_plus_uconst, 6); + +#undef EXPECT_VALID_FRAGMENT +#undef EXPECT_INVALID_FRAGMENT +} + typedef MetadataTest DIObjCPropertyTest; TEST_F(DIObjCPropertyTest, get) { From 549db744bde29c8331411a4b41607a33c363c108 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 21 Nov 2019 14:06:54 +0000 Subject: [PATCH 056/591] [ARM] Lots of MVE offset masked load and store tests. NFC --- .../CodeGen/Thumb2/mve-masked-ldst-offset.ll | 2646 ++++++++++++++++ .../CodeGen/Thumb2/mve-masked-ldst-postinc.ll | 2726 +++++++++++++++++ .../CodeGen/Thumb2/mve-masked-ldst-preinc.ll | 2726 +++++++++++++++++ llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll | 118 +- 4 files changed, 8157 insertions(+), 59 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll new file mode 100644 index 0000000000000..ba3ef58c3c2eb --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll @@ -0,0 +1,2646 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #2 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #512 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwu32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #512 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %x +} + +define i8* @ldrhu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #254] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #256 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #-254] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #256 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #254] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #256 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #-254] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #256 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrhu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #254] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #256 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #256 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %x +} + +define i8* @ldrbu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #127] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #-127] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #2] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #127] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #-127] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %x +} + +define i8* @ldrbu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #127] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #-127] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #127] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #-127] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %x +} + +define i8* @ldrbu8_4(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #4] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_3(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #3] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_2(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #2] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #127] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #128 +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r3] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_m127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #-127] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #128 +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r3] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %x +} + +define i8* @ldrwf32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r3, r0, #2 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #512 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrwf32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #512 +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r3] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %x +} + +define i8* @ldrhf16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: adds r3, r0, #3 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #2] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #254] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r3, r0, #256 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + +define i8* @ldrhf16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r3, r0, #256 +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r3] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %x +} + + + + +define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #2 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #508] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #512 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #512 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #256 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #-254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #256 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %y +} + +define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #256 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #-254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #256 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #128 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #-127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %y +} + +define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #128 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #-127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %y +} + +define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #3] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #128 +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #-127] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #128 +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: adds r1, r0, #2 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #508] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #512 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #512 +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #4] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: adds r1, r0, #3 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #2] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: add.w r1, r0, #256 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #-254] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: sub.w r1, r0, #256 +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %y +} + +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v4i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll new file mode 100644 index 0000000000000..69286c8777c02 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll @@ -0,0 +1,2726 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrbu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu8_4(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_3(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_2(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrwf32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhf16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + + + + +define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %y to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %y to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %y to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %y to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %y to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %y to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %y to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %y to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll new file mode 100644 index 0000000000000..287446963ce66 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll @@ -0,0 +1,2726 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE +; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE + +define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] +; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] +; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwu32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwu32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %z to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef) + %2 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %1, <4 x i32>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #254] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0, #-254] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhu32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = zext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #254] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m254(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0, #-254] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhs32_m256(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrhs32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrht.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %c, <4 x i16> undef) + %2 = sext <4 x i16> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrhu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #254] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhu16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhu16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %c, <8 x i16> undef) + %2 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %1, <8 x i16>* %2, align 2 + ret i8* %z +} + +define i8* @ldrbu32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #127] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0, #-127] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbu32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = zext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #127] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m127(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0, #-127] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbs32_m128(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrbs32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrbt.s32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %c, <4 x i8> undef) + %2 = sext <4 x i8> %1 to <4 x i32> + %3 = bitcast i8* %y to <4 x i32>* + store <4 x i32> %2, <4 x i32>* %3, align 4 + ret i8* %z +} + +define i8* @ldrbu16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #127] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0, #-127] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbu16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = zext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #127] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m127(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0, #-127] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbs16_m128(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrbs16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrbt.s16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %c, <8 x i8> undef) + %2 = sext <8 x i8> %1 to <8 x i16> + %3 = bitcast i8* %y to <8 x i16>* + store <8 x i16> %2, <8 x i16>* %3, align 2 + ret i8* %z +} + +define i8* @ldrbu8_4(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_3(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_2(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #127] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 127 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 128 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m127(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0, #-127] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -127 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrbu8_m128(i8* %x, i8* %y, <16 x i8> *%m) { +; CHECK-LABEL: ldrbu8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: vpt.i8 ne, q0, zr +; CHECK-NEXT: vldrbt.u8 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -128 + %0 = bitcast i8* %z to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %c, <16 x i8> undef) + %2 = bitcast i8* %y to <16 x i8>* + store <16 x i8> %1, <16 x i8>* %2, align 1 + ret i8* %z +} + +define i8* @ldrwf32_4(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_3(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_2(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] +; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 508 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 512 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_m508(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] +; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -508 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrwf32_m512(i8* %x, i8* %y, <4 x i32> *%m) { +; CHECK-LABEL: ldrwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: vpt.i32 ne, q0, zr +; CHECK-NEXT: vldrwt.u32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -512 + %0 = bitcast i8* %z to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef) + %2 = bitcast i8* %y to <4 x float>* + store <4 x float> %1, <4 x float>* %2, align 4 + ret i8* %z +} + +define i8* @ldrhf16_4(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 4 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_3(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 3 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_2(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 2 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #254] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 254 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 256 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_m254(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -254 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + +define i8* @ldrhf16_m256(i8* %x, i8* %y, <8 x i16> *%m) { +; CHECK-LABEL: ldrhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: vpt.i16 ne, q0, zr +; CHECK-NEXT: vldrht.u16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r1] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %x, i32 -256 + %0 = bitcast i8* %z to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %c, <8 x half> undef) + %2 = bitcast i8* %y to <8 x half>* + store <8 x half> %1, <8 x half>* %2, align 2 + ret i8* %z +} + + + + +define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #508] +; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] +; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strw32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x i32>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i32>, <4 x i32>* %0, align 4 + %2 = bitcast i8* %z to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %1, <4 x i32>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #254] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0, #-254] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strh32_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrht.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <4 x i16>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i16>, <4 x i16>* %0, align 2 + %2 = bitcast i8* %z to <4 x i16>* + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %1, <4 x i16>* %2, i32 2, <4 x i1> %c) + ret i8* %z +} + +define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #254] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #-254] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strh16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x i16>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i16>, <8 x i16>* %0, align 2 + %2 = bitcast i8* %z to <8 x i16>* + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %1, <8 x i16>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #127] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0, #-127] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strb32_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrbt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <4 x i8>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = bitcast i8* %z to <4 x i8>* + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %1, <4 x i8>* %2, i32 1, <4 x i1> %c) + ret i8* %z +} + +define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #127] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0, #-127] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strb16_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrbt.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <8 x i8>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x i8>, <8 x i8>* %0, align 1 + %2 = bitcast i8* %z to <8 x i8>* + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %1, <8 x i8>* %2, i32 1, <8 x i1> %c) + ret i8* %z +} + +define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #3] +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #127] +; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m127: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0, #-127] +; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -127 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { +; CHECK-LABEL: strb8_m128: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: subs r0, #128 +; CHECK-NEXT: vldrb.u8 q0, [r1] +; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vpt.i8 ne, q1, zr +; CHECK-NEXT: vstrbt.8 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -128 + %0 = bitcast i8* %x to <16 x i8>* + %mask = load <16 x i8>, <16 x i8>* %m, align 1 + %c = icmp ne <16 x i8> %mask, zeroinitializer + %1 = load <16 x i8>, <16 x i8>* %0, align 1 + %2 = bitcast i8* %z to <16 x i8>* + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %1, <16 x i8>* %2, i32 1, <16 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #508] +; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m508: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] +; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -508 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { +; CHECK-LABEL: strwf32_m512: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #512 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vpt.i32 ne, q1, zr +; CHECK-NEXT: vstrwt.32 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -512 + %0 = bitcast i8* %x to <4 x float>* + %mask = load <4 x i32>, <4 x i32>* %m, align 4 + %c = icmp ne <4 x i32> %mask, zeroinitializer + %1 = load <4 x float>, <4 x float>* %0, align 4 + %2 = bitcast i8* %z to <4 x float>* + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %1, <4 x float>* %2, i32 4, <4 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_4: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #4] +; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 4 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_3: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 3 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #2] +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 2 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #254] +; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: add.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m254: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0, #-254] +; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -254 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { +; CHECK-LABEL: strhf16_m256: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub.w r0, r0, #256 +; CHECK-NEXT: vldrh.u16 q0, [r1] +; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vpt.i16 ne, q1, zr +; CHECK-NEXT: vstrht.16 q0, [r0] +; CHECK-NEXT: bx lr +entry: + %z = getelementptr inbounds i8, i8* %y, i32 -256 + %0 = bitcast i8* %x to <8 x half>* + %mask = load <8 x i16>, <8 x i16>* %m, align 2 + %c = icmp ne <8 x i16> %mask, zeroinitializer + %1 = load <8 x half>, <8 x half>* %0, align 2 + %2 = bitcast i8* %z to <8 x half>* + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %1, <8 x half>* %2, i32 2, <8 x i1> %c) + ret i8* %z +} + +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) + +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll index 46b64c8e4d8b1..100a082fd12be 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -13,8 +13,8 @@ define void @foo_v4i32_v4i32(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i32> *%src entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) - call void @llvm.masked.store.v4i32(<4 x i32> %2, <4 x i32>* %dest, i32 4, <4 x i1> %1) + %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %2, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -29,9 +29,9 @@ define void @foo_sext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *% entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) + %2 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = sext <4 x i8> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -46,9 +46,9 @@ define void @foo_sext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) + %2 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = sext <4 x i16> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -63,9 +63,9 @@ define void @foo_zext_v4i32_v4i8(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i8> *% entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i8> @llvm.masked.load.v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) + %2 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %src, i32 1, <4 x i1> %1, <4 x i8> undef) %3 = zext <4 x i8> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -80,9 +80,9 @@ define void @foo_zext_v4i32_v4i16(<4 x i32> *%dest, <4 x i32> *%mask, <4 x i16> entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i16> @llvm.masked.load.v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) + %2 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %src, i32 2, <4 x i1> %1, <4 x i16> undef) %3 = zext <4 x i16> %2 to <4 x i32> - call void @llvm.masked.store.v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %dest, i32 4, <4 x i1> %1) ret void } @@ -234,9 +234,9 @@ define void @foo_sext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer - %2 = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef) + %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef) %3 = sext <2 x i32> %2 to <2 x i64> - call void @llvm.masked.store.v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1) ret void } @@ -392,9 +392,9 @@ define void @foo_sext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer - %2 = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef) + %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef) %3 = sext <2 x i32> %2 to <2 x i64> - call void @llvm.masked.store.v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1) ret void } @@ -549,9 +549,9 @@ define void @foo_zext_v2i64_v2i32(<2 x i64> *%dest, <2 x i32> *%mask, <2 x i32> entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer - %2 = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef) + %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 4, <2 x i1> %1, <2 x i32> undef) %3 = zext <2 x i32> %2 to <2 x i64> - call void @llvm.masked.store.v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 8, <2 x i1> %1) ret void } @@ -710,9 +710,9 @@ define void @foo_zext_v2i64_v2i32_unaligned(<2 x i64> *%dest, <2 x i32> *%mask, entry: %0 = load <2 x i32>, <2 x i32>* %mask, align 4 %1 = icmp sgt <2 x i32> %0, zeroinitializer - %2 = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef) + %2 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %src, i32 2, <2 x i1> %1, <2 x i32> undef) %3 = zext <2 x i32> %2 to <2 x i64> - call void @llvm.masked.store.v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1) + call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %3, <2 x i64>* %dest, i32 4, <2 x i1> %1) ret void } @@ -727,8 +727,8 @@ define void @foo_v8i16_v8i16(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i16> *%src entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) - call void @llvm.masked.store.v8i16(<8 x i16> %2, <8 x i16>* %dest, i32 2, <8 x i1> %1) + %2 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %2, <8 x i16>* %dest, i32 2, <8 x i1> %1) ret void } @@ -743,9 +743,9 @@ define void @foo_sext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *% entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) + %2 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = sext <8 x i8> %2 to <8 x i16> - call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) ret void } @@ -760,9 +760,9 @@ define void @foo_zext_v8i16_v8i8(<8 x i16> *%dest, <8 x i16> *%mask, <8 x i8> *% entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i8> @llvm.masked.load.v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) + %2 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %src, i32 1, <8 x i1> %1, <8 x i8> undef) %3 = zext <8 x i8> %2 to <8 x i16> - call void @llvm.masked.store.v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %3, <8 x i16>* %dest, i32 2, <8 x i1> %1) ret void } @@ -777,8 +777,8 @@ define void @foo_v16i8_v16i8(<16 x i8> *%dest, <16 x i8> *%mask, <16 x i8> *%src entry: %0 = load <16 x i8>, <16 x i8>* %mask, align 1 %1 = icmp sgt <16 x i8> %0, zeroinitializer - %2 = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %src, i32 1, <16 x i1> %1, <16 x i8> undef) - call void @llvm.masked.store.v16i8(<16 x i8> %2, <16 x i8>* %dest, i32 1, <16 x i1> %1) + %2 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %src, i32 1, <16 x i1> %1, <16 x i8> undef) + call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %2, <16 x i8>* %dest, i32 1, <16 x i1> %1) ret void } @@ -793,9 +793,9 @@ define void @foo_trunc_v8i8_v8i16(<8 x i8> *%dest, <8 x i16> *%mask, <8 x i16> * entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) + %2 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %src, i32 2, <8 x i1> %1, <8 x i16> undef) %3 = trunc <8 x i16> %2 to <8 x i8> - call void @llvm.masked.store.v8i8(<8 x i8> %3, <8 x i8>* %dest, i32 1, <8 x i1> %1) + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %3, <8 x i8>* %dest, i32 1, <8 x i1> %1) ret void } @@ -810,9 +810,9 @@ define void @foo_trunc_v4i8_v4i32(<4 x i8> *%dest, <4 x i32> *%mask, <4 x i32> * entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = trunc <4 x i32> %2 to <4 x i8> - call void @llvm.masked.store.v4i8(<4 x i8> %3, <4 x i8>* %dest, i32 1, <4 x i1> %1) + call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %3, <4 x i8>* %dest, i32 1, <4 x i1> %1) ret void } @@ -827,9 +827,9 @@ define void @foo_trunc_v4i16_v4i32(<4 x i16> *%dest, <4 x i32> *%mask, <4 x i32> entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) + %2 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %src, i32 4, <4 x i1> %1, <4 x i32> undef) %3 = trunc <4 x i32> %2 to <4 x i16> - call void @llvm.masked.store.v4i16(<4 x i16> %3, <4 x i16>* %dest, i32 2, <4 x i1> %1) + call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %3, <4 x i16>* %dest, i32 2, <4 x i1> %1) ret void } @@ -844,8 +844,8 @@ define void @foo_v4f32_v4f32(<4 x float> *%dest, <4 x i32> *%mask, <4 x float> * entry: %0 = load <4 x i32>, <4 x i32>* %mask, align 4 %1 = icmp sgt <4 x i32> %0, zeroinitializer - %2 = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %src, i32 4, <4 x i1> %1, <4 x float> undef) - call void @llvm.masked.store.v4f32(<4 x float> %2, <4 x float>* %dest, i32 4, <4 x i1> %1) + %2 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %src, i32 4, <4 x i1> %1, <4 x float> undef) + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %2, <4 x float>* %dest, i32 4, <4 x i1> %1) ret void } @@ -860,8 +860,8 @@ define void @foo_v8f16_v8f16(<8 x half> *%dest, <8 x i16> *%mask, <8 x half> *%s entry: %0 = load <8 x i16>, <8 x i16>* %mask, align 2 %1 = icmp sgt <8 x i16> %0, zeroinitializer - %2 = call <8 x half> @llvm.masked.load.v8f16(<8 x half>* %src, i32 2, <8 x i1> %1, <8 x half> undef) - call void @llvm.masked.store.v8f16(<8 x half> %2, <8 x half>* %dest, i32 2, <8 x i1> %1) + %2 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %src, i32 2, <8 x i1> %1, <8 x half> undef) + call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %2, <8 x half>* %dest, i32 2, <8 x i1> %1) ret void } @@ -991,9 +991,9 @@ define void @foo_v4f32_v4f16(<4 x float> *%dest, <4 x i16> *%mask, <4 x half> *% entry: %0 = load <4 x i16>, <4 x i16>* %mask, align 2 %1 = icmp sgt <4 x i16> %0, zeroinitializer - %2 = call <4 x half> @llvm.masked.load.v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef) + %2 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef) %3 = fpext <4 x half> %2 to <4 x float> - call void @llvm.masked.store.v4f32(<4 x float> %3, <4 x float>* %dest, i32 2, <4 x i1> %1) + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %dest, i32 2, <4 x i1> %1) ret void } @@ -1123,29 +1123,29 @@ define void @foo_v4f32_v4f16_unaligned(<4 x float> *%dest, <4 x i16> *%mask, <4 entry: %0 = load <4 x i16>, <4 x i16>* %mask, align 2 %1 = icmp sgt <4 x i16> %0, zeroinitializer - %2 = call <4 x half> @llvm.masked.load.v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef) + %2 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* %src, i32 2, <4 x i1> %1, <4 x half> undef) %3 = fpext <4 x half> %2 to <4 x float> - call void @llvm.masked.store.v4f32(<4 x float> %3, <4 x float>* %dest, i32 1, <4 x i1> %1) + call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %dest, i32 1, <4 x i1> %1) ret void } -declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) -declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) -declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) -declare void @llvm.masked.store.v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) -declare void @llvm.masked.store.v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) -declare <16 x i8> @llvm.masked.load.v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) -declare <8 x i16> @llvm.masked.load.v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) -declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) -declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) -declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) -declare <4 x half> @llvm.masked.load.v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>) -declare <8 x half> @llvm.masked.load.v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) +declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) +declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) +declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) +declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) +declare <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>*, i32, <4 x i1>, <4 x half>) +declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>) -declare void @llvm.masked.store.v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) -declare void @llvm.masked.store.v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) -declare void @llvm.masked.store.v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) -declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) -declare <4 x i16> @llvm.masked.load.v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) -declare <4 x i8> @llvm.masked.load.v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) -declare <8 x i8> @llvm.masked.load.v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) +declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32, <8 x i1>) +declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32, <4 x i1>) +declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32, <4 x i1>) +declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) +declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32, <4 x i1>, <4 x i16>) +declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32, <4 x i1>, <4 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32, <8 x i1>, <8 x i8>) From b5315ae8ffa6fb8befdd558d0dfd04295dbc5523 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 21 Nov 2019 14:56:37 +0000 Subject: [PATCH 057/591] [Codegen][ARM] Add addressing modes from masked loads and stores MVE has a basic symmetry between it's normal loads/store operations and the masked variants. This means that masked loads and stores can use pre-inc and post-inc addressing modes, just like the standard loads and stores already do. To enable that, this patch adds all the relevant infrastructure for treating masked loads/stores addressing modes in the same way as normal loads/stores. This involves: - Adding an AddressingMode to MaskedLoadStoreSDNode, along with an extra Offset operand that is added after the PtrBase. - Extending the IndexedModeActions from 8bits to 16bits to store the legality of masked operations as well as normal ones. This array is fairly small, so doubling the size still won't make it very large. Offset masked loads can then be controlled with setIndexedMaskedLoadAction, similar to standard loads. - The same methods that combine to indexed loads, such as CombineToPostIndexedLoadStore, are adjusted to handle masked loads in the same way. - The ARM backend is then adjusted to make use of these indexed masked loads/stores. - The X86 backend is adjusted to hopefully be no functional changes. Differential Revision: https://reviews.llvm.org/D70176 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 19 +- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 53 ++-- llvm/include/llvm/CodeGen/TargetLowering.h | 114 ++++++--- .../include/llvm/Target/TargetSelectionDAG.td | 10 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 177 ++++++++----- .../SelectionDAG/LegalizeIntegerTypes.cpp | 16 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 36 ++- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 63 ++++- .../SelectionDAG/SelectionDAGBuilder.cpp | 13 +- .../SelectionDAG/SelectionDAGDumper.cpp | 8 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 27 +- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 118 ++++++--- llvm/lib/Target/ARM/ARMISelLowering.cpp | 74 ++++-- llvm/lib/Target/ARM/ARMInstrMVE.td | 118 ++++++++- llvm/lib/Target/X86/X86ISelLowering.cpp | 44 ++-- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 35 ++- .../cond-vector-reduce-mve-codegen.ll | 2 +- .../Thumb2/LowOverheadLoops/fast-fp-loops.ll | 19 +- .../LowOverheadLoops/mve-tail-data-types.ll | 59 ++--- .../LowOverheadLoops/vector-arith-codegen.ll | 49 ++-- .../CodeGen/Thumb2/mve-masked-ldst-postinc.ll | 240 ++++++------------ .../CodeGen/Thumb2/mve-masked-ldst-preinc.ll | 240 ++++++------------ llvm/test/CodeGen/Thumb2/mve-masked-load.ll | 60 ++--- llvm/test/CodeGen/Thumb2/mve-masked-store.ll | 60 ++--- 25 files changed, 907 insertions(+), 749 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index a0e37a19b37dd..8387e9a0e61db 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1136,14 +1136,19 @@ class SelectionDAG { /// Returns sum of the base pointer and offset. SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL); - SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, - SDValue Mask, SDValue Src0, EVT MemVT, - MachineMemOperand *MMO, ISD::LoadExtType, - bool IsExpanding = false); + SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, + SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexedMode AM, + ISD::LoadExtType, bool IsExpanding = false); + SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM); SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, - SDValue Ptr, SDValue Mask, EVT MemVT, - MachineMemOperand *MMO, bool IsTruncating = false, - bool IsCompressing = false); + SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexedMode AM, + bool IsTruncating = false, bool IsCompressing = false); + SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 3b799f967318a..e18278f8cdc61 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -553,6 +553,7 @@ BEGIN_TWO_BYTE_PACK() class LSBaseSDNodeBitfields { friend class LSBaseSDNode; + friend class MaskedLoadStoreSDNode; friend class MaskedGatherScatterSDNode; uint16_t : NumMemSDNodeBits; @@ -560,6 +561,7 @@ BEGIN_TWO_BYTE_PACK() // This storage is shared between disparate class hierarchies to hold an // enumeration specific to the class hierarchy in use. // LSBaseSDNode => enum ISD::MemIndexedMode + // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode // MaskedGatherScatterSDNode => enum ISD::MemIndexType uint16_t AddressingMode : 3; }; @@ -2273,19 +2275,38 @@ class MaskedLoadStoreSDNode : public MemSDNode { friend class SelectionDAG; MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, - const DebugLoc &dl, SDVTList VTs, EVT MemVT, + const DebugLoc &dl, SDVTList VTs, + ISD::MemIndexedMode AM, EVT MemVT, MachineMemOperand *MMO) - : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + LSBaseSDNodeBits.AddressingMode = AM; + assert(getAddressingMode() == AM && "Value truncated"); + } - // MaskedLoadSDNode (Chain, ptr, mask, passthru) - // MaskedStoreSDNode (Chain, data, ptr, mask) + // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru) + // MaskedStoreSDNode (Chain, data, ptr, offset, mask) // Mask is a vector of i1 elements const SDValue &getBasePtr() const { return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2); } - const SDValue &getMask() const { + const SDValue &getOffset() const { return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3); } + const SDValue &getMask() const { + return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4); + } + + /// Return the addressing mode for this load or store: + /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. + ISD::MemIndexedMode getAddressingMode() const { + return static_cast(LSBaseSDNodeBits.AddressingMode); + } + + /// Return true if this is a pre/post inc/dec load/store. + bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } + + /// Return true if this is NOT a pre/post inc/dec load/store. + bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MLOAD || @@ -2299,9 +2320,9 @@ class MaskedLoadSDNode : public MaskedLoadStoreSDNode { friend class SelectionDAG; MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - ISD::LoadExtType ETy, bool IsExpanding, EVT MemVT, - MachineMemOperand *MMO) - : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) { + ISD::MemIndexedMode AM, ISD::LoadExtType ETy, + bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) + : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) { LoadSDNodeBits.ExtTy = ETy; LoadSDNodeBits.IsExpanding = IsExpanding; } @@ -2311,8 +2332,9 @@ class MaskedLoadSDNode : public MaskedLoadStoreSDNode { } const SDValue &getBasePtr() const { return getOperand(1); } - const SDValue &getMask() const { return getOperand(2); } - const SDValue &getPassThru() const { return getOperand(3); } + const SDValue &getOffset() const { return getOperand(2); } + const SDValue &getMask() const { return getOperand(3); } + const SDValue &getPassThru() const { return getOperand(4); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MLOAD; @@ -2327,9 +2349,9 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode { friend class SelectionDAG; MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, - bool isTrunc, bool isCompressing, EVT MemVT, - MachineMemOperand *MMO) - : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) { + ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, + EVT MemVT, MachineMemOperand *MMO) + : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) { StoreSDNodeBits.IsTruncating = isTrunc; StoreSDNodeBits.IsCompressing = isCompressing; } @@ -2345,9 +2367,10 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode { /// memory at base_addr. bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } - const SDValue &getValue() const { return getOperand(1); } + const SDValue &getValue() const { return getOperand(1); } const SDValue &getBasePtr() const { return getOperand(2); } - const SDValue &getMask() const { return getOperand(3); } + const SDValue &getOffset() const { return getOperand(3); } + const SDValue &getMask() const { return getOperand(4); } static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MSTORE; diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index fa84d0efbdea9..825cb712f7cac 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1110,12 +1110,8 @@ class TargetLoweringBase { /// Return how the indexed load should be treated: either it is legal, needs /// to be promoted to a larger size, needs to be expanded to some other code /// sequence, or the target has a custom expander for it. - LegalizeAction - getIndexedLoadAction(unsigned IdxMode, MVT VT) const { - assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && - "Table isn't big enough!"); - unsigned Ty = (unsigned)VT.SimpleTy; - return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); + LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { + return getIndexedModeAction(IdxMode, VT, IMAB_Load); } /// Return true if the specified indexed load is legal on this target. @@ -1128,12 +1124,8 @@ class TargetLoweringBase { /// Return how the indexed store should be treated: either it is legal, needs /// to be promoted to a larger size, needs to be expanded to some other code /// sequence, or the target has a custom expander for it. - LegalizeAction - getIndexedStoreAction(unsigned IdxMode, MVT VT) const { - assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && - "Table isn't big enough!"); - unsigned Ty = (unsigned)VT.SimpleTy; - return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); + LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { + return getIndexedModeAction(IdxMode, VT, IMAB_Store); } /// Return true if the specified indexed load is legal on this target. @@ -1143,6 +1135,34 @@ class TargetLoweringBase { getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); } + /// Return how the indexed load should be treated: either it is legal, needs + /// to be promoted to a larger size, needs to be expanded to some other code + /// sequence, or the target has a custom expander for it. + LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { + return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad); + } + + /// Return true if the specified indexed load is legal on this target. + bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { + return VT.isSimple() && + (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || + getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); + } + + /// Return how the indexed store should be treated: either it is legal, needs + /// to be promoted to a larger size, needs to be expanded to some other code + /// sequence, or the target has a custom expander for it. + LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { + return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore); + } + + /// Return true if the specified indexed load is legal on this target. + bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { + return VT.isSimple() && + (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || + getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); + } + /// Return how the condition code should be treated: either it is legal, needs /// to be expanded to some other code sequence, or the target has a custom /// expander for it. @@ -2030,13 +2050,8 @@ class TargetLoweringBase { /// /// NOTE: All indexed mode loads are initialized to Expand in /// TargetLowering.cpp - void setIndexedLoadAction(unsigned IdxMode, MVT VT, - LegalizeAction Action) { - assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && - (unsigned)Action < 0xf && "Table isn't big enough!"); - // Load action are kept in the upper half. - IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; - IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; + void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { + setIndexedModeAction(IdxMode, VT, IMAB_Load, Action); } /// Indicate that the specified indexed store does or does not work with the @@ -2044,13 +2059,28 @@ class TargetLoweringBase { /// /// NOTE: All indexed mode stores are initialized to Expand in /// TargetLowering.cpp - void setIndexedStoreAction(unsigned IdxMode, MVT VT, - LegalizeAction Action) { - assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && - (unsigned)Action < 0xf && "Table isn't big enough!"); - // Store action are kept in the lower half. - IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; - IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); + void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { + setIndexedModeAction(IdxMode, VT, IMAB_Store, Action); + } + + /// Indicate that the specified indexed masked load does or does not work with + /// the specified type and indicate what to do about it. + /// + /// NOTE: All indexed mode masked loads are initialized to Expand in + /// TargetLowering.cpp + void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, + LegalizeAction Action) { + setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action); + } + + /// Indicate that the specified indexed masked store does or does not work + /// with the specified type and indicate what to do about it. + /// + /// NOTE: All indexed mode masked stores are initialized to Expand in + /// TargetLowering.cpp + void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, + LegalizeAction Action) { + setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action); } /// Indicate that the specified condition code is or isn't supported on the @@ -2763,13 +2793,13 @@ class TargetLoweringBase { /// truncating store of a specific value type and truncating type is legal. LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; - /// For each indexed mode and each value type, keep a pair of LegalizeAction + /// For each indexed mode and each value type, keep a quad of LegalizeAction /// that indicates how instruction selection should deal with the load / - /// store. + /// store / maskedload / maskedstore. /// /// The first dimension is the value_type for the reference. The second /// dimension represents the various modes for load store. - uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; + uint16_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; /// For each condition code (ISD::CondCode) keep a LegalizeAction that /// indicates how instruction selection should deal with the condition code. @@ -2812,6 +2842,32 @@ class TargetLoweringBase { /// Set default libcall names and calling conventions. void InitLibcalls(const Triple &TT); + /// The bits of IndexedModeActions used to store the legalisation actions + /// We store the data as | ML | MS | L | S | each taking 4 bits. + enum IndexedModeActionsBits { + IMAB_Store = 0, + IMAB_Load = 4, + IMAB_MaskedStore = 8, + IMAB_MaskedLoad = 12 + }; + + void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, + LegalizeAction Action) { + assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && + (unsigned)Action < 0xf && "Table isn't big enough!"); + unsigned Ty = (unsigned)VT.SimpleTy; + IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); + IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; + } + + LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, + unsigned Shift) const { + assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && + "Table isn't big enough!"); + unsigned Ty = (unsigned)VT.SimpleTy; + return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); + } + protected: /// Return true if the extension represented by \p I is free. /// \pre \p I is a sign, zero, or fp extension and diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 441f3d7d118d1..9543086c4da72 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -224,13 +224,13 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> ]>; -def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store - SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2> +def SDTMaskedStore: SDTypeProfile<0, 4, [ // masked store + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameNumEltsAs<0, 3> ]>; -def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load - SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>, - SDTCisSameNumEltsAs<0, 2> +def SDTMaskedLoad: SDTypeProfile<1, 4, [ // masked load + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameAs<0, 4>, + SDTCisSameNumEltsAs<0, 3> ]>; def SDTVecShuffle : SDTypeProfile<1, 2, [ diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 793352c16d35a..e6844e556b11d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8724,6 +8724,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -8748,6 +8752,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); } @@ -9506,11 +9514,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, SDLoc dl(Ld); SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); - SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), - Ld->getBasePtr(), Ld->getMask(), - PassThru, Ld->getMemoryVT(), - Ld->getMemOperand(), ExtLoadType, - Ld->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(), + ExtLoadType, Ld->isExpandingLoad()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); return NewLoad; } @@ -13612,12 +13619,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, EVT VT; unsigned AS; - if (LoadSDNode *LD = dyn_cast(Use)) { + if (LoadSDNode *LD = dyn_cast(Use)) { if (LD->isIndexed() || LD->getBasePtr().getNode() != N) return false; VT = LD->getMemoryVT(); AS = LD->getAddressSpace(); - } else if (StoreSDNode *ST = dyn_cast(Use)) { + } else if (StoreSDNode *ST = dyn_cast(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else if (MaskedLoadSDNode *LD = dyn_cast(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (MaskedStoreSDNode *ST = dyn_cast(Use)) { if (ST->isIndexed() || ST->getBasePtr().getNode() != N) return false; VT = ST->getMemoryVT(); @@ -13651,38 +13668,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, VT.getTypeForEVT(*DAG.getContext()), AS); } -/// Try turning a load/store into a pre-indexed load/store when the base -/// pointer is an add or subtract and it has other uses besides the load/store. -/// After the transformation, the new indexed load/store has effectively folded -/// the add/subtract in and all of its other uses are redirected to the -/// new load/store. -bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (Level < AfterLegalizeDAG) - return false; - - bool isLoad = true; - SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast(N)) { +static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, + bool &IsLoad, bool &IsMasked, SDValue &Ptr, + const TargetLowering &TLI) { + if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT)) return false; Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast(N)) { + } else if (StoreSDNode *ST = dyn_cast(N)) { if (ST->isIndexed()) return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT)) return false; Ptr = ST->getBasePtr(); - isLoad = false; + IsLoad = false; + } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { + if (LD->isIndexed()) + return false; + EVT VT = LD->getMemoryVT(); + if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) && + !TLI.isIndexedMaskedLoadLegal(Dec, VT)) + return false; + Ptr = LD->getBasePtr(); + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast(N)) { + if (ST->isIndexed()) + return false; + EVT VT = ST->getMemoryVT(); + if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) && + !TLI.isIndexedMaskedStoreLegal(Dec, VT)) + return false; + Ptr = ST->getBasePtr(); + IsLoad = false; + IsMasked = true; } else { return false; } + return true; +} + +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool IsLoad = true; + bool IsMasked = false; + SDValue Ptr; + if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked, + Ptr, TLI)) + return false; // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail // out. There is no reason to make this a preinc/predec. @@ -13724,8 +13767,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; // Check #2. - if (!isLoad) { - SDValue Val = cast(N)->getValue(); + if (!IsLoad) { + SDValue Val = IsMasked ? cast(N)->getValue() + : cast(N)->getValue(); // Would require a copy. if (Val == BasePtr) @@ -13801,18 +13845,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; SDValue Result; - if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); - else - Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + if (!IsMasked) { + if (IsLoad) + Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + else + Result = + DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); + } else { + if (IsLoad) + Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + else + Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM); + } ++PreIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13866,7 +13918,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // We can now generate the new expression. SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); - SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, DL, @@ -13876,7 +13928,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Replace the uses of Ptr with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); AddToWorklist(Result.getNode()); @@ -13891,29 +13943,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; - bool isLoad = true; + bool IsLoad = true; + bool IsMasked = false; SDValue Ptr; - EVT VT; - if (LoadSDNode *LD = dyn_cast(N)) { - if (LD->isIndexed()) - return false; - VT = LD->getMemoryVT(); - if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && - !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) - return false; - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast(N)) { - if (ST->isIndexed()) - return false; - VT = ST->getMemoryVT(); - if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && - !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) - return false; - Ptr = ST->getBasePtr(); - isLoad = false; - } else { + if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, + Ptr, TLI)) return false; - } if (Ptr.getNode()->hasOneUse()) return false; @@ -13949,7 +13984,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // If all the uses are load / store addresses, then don't do the // transformation. - if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { bool RealUse = false; for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) @@ -13975,18 +14010,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { Worklist.push_back(Op); if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { - SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), - BasePtr, Offset, AM); + SDValue Result; + if (!IsMasked) + Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM) + : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); - if (isLoad) { + if (IsLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); } else { @@ -13998,7 +14039,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(isLoad ? 1 : 0)); + Result.getValue(IsLoad ? 1 : 0)); deleteAndRecombine(Op); return true; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 56c13bb0753d2..9f8da60eb9a6c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -592,8 +592,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), - N->getMask(), ExtPassThru, N->getMemoryVT(), - N->getMemOperand(), ISD::EXTLOAD); + N->getOffset(), N->getMask(), ExtPassThru, + N->getMemoryVT(), N->getMemOperand(), + N->getAddressingMode(), ISD::EXTLOAD); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -1485,11 +1486,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, SDLoc dl(N); bool TruncateStore = false; - if (OpNo == 3) { + if (OpNo == 4) { Mask = PromoteTargetBoolean(Mask, DataVT); // Update in place. SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps[3] = Mask; + NewOps[4] = Mask; return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } else { // Data operand assert(OpNo == 1 && "Unexpected operand for promotion"); @@ -1497,14 +1498,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, TruncateStore = true; } - return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, - N->getMemoryVT(), N->getMemOperand(), + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), + N->getOffset(), Mask, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), TruncateStore, N->isCompressingStore()); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) { - assert(OpNo == 2 && "Only know how to promote the mask!"); + assert(OpNo == 3 && "Only know how to promote the mask!"); EVT DataVT = N->getValueType(0); SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); SmallVector NewOps(N->op_begin(), N->op_end()); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7bca3ea888ec4..9403b344ea747 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1541,12 +1541,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { + assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); EVT LoVT, HiVT; SDLoc dl(MLD); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); SDValue Ch = MLD->getChain(); SDValue Ptr = MLD->getBasePtr(); + SDValue Offset = MLD->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked load offset"); SDValue Mask = MLD->getMask(); SDValue PassThru = MLD->getPassThru(); unsigned Alignment = MLD->getOriginalAlignment(); @@ -1578,8 +1581,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); @@ -1590,8 +1594,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), MLD->getRanges()); - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO, - ExtType, MLD->isExpandingLoad()); + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, + MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); // Build a factor node to remember that this load is independent of the // other one. @@ -2326,8 +2331,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed masked store of vector?"); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); + SDValue Offset = N->getOffset(); + assert(Offset.isUndef() && "Unexpected indexed masked store offset"); SDValue Mask = N->getMask(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); @@ -2361,8 +2369,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - N->isTruncatingStore(), + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, @@ -2374,8 +2382,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - N->isTruncatingStore(), N->isCompressingStore()); + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); // Build a factor node to remember that this store is independent of the // other one. @@ -3699,10 +3708,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { WidenVT.getVectorNumElements()); Mask = ModifyToType(Mask, WideMaskVT, true); - SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), - Mask, PassThru, N->getMemoryVT(), - N->getMemOperand(), ExtType, - N->isExpandingLoad()); + SDValue Res = DAG.getMaskedLoad( + WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + ExtType, N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -4447,7 +4456,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { StVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), - Mask, MST->getMemoryVT(), MST->getMemOperand(), + MST->getOffset(), Mask, MST->getMemoryVT(), + MST->getMemOperand(), MST->getAddressingMode(), false, MST->isCompressingStore()); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f1b88d80f43be..a20e43462f707 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6975,16 +6975,22 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, - SDValue Ptr, SDValue Mask, SDValue PassThru, - EVT MemVT, MachineMemOperand *MMO, + SDValue Base, SDValue Offset, SDValue Mask, + SDValue PassThru, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, ISD::LoadExtType ExtTy, bool isExpanding) { - SDVTList VTs = getVTList(VT, MVT::Other); - SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked load with an offset!"); + SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); + dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -6992,7 +6998,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return SDValue(E, 0); } auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, - ExtTy, isExpanding, MemVT, MMO); + AM, ExtTy, isExpanding, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7002,27 +7008,45 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, return V; } +SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedLoadSDNode *LD = cast(OrigLoad); + assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!"); + return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base, + Offset, LD->getMask(), LD->getPassThru(), + LD->getMemoryVT(), LD->getMemOperand(), AM, + LD->getExtensionType(), LD->isExpandingLoad()); +} + SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, - SDValue Val, SDValue Ptr, SDValue Mask, - EVT MemVT, MachineMemOperand *MMO, - bool IsTruncating, bool IsCompressing) { + SDValue Val, SDValue Base, SDValue Offset, + SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, + ISD::MemIndexedMode AM, bool IsTruncating, + bool IsCompressing) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - SDVTList VTs = getVTList(MVT::Other); - SDValue Ops[] = { Chain, Val, Ptr, Mask }; + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && + "Unindexed masked store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Base, Offset, Mask}; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData( - dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); + dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, - IsTruncating, IsCompressing, MemVT, MMO); + auto *N = + newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + IsTruncating, IsCompressing, MemVT, MMO); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -7032,6 +7056,17 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, return V; } +SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + MaskedStoreSDNode *ST = cast(OrigStore); + assert(ST->getOffset().isUndef() && + "Masked store is already a indexed store!"); + return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset, + ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(), + AM, ST->isTruncatingStore(), ST->isCompressingStore()); +} + SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1ed0dc2c979fc..0aeb3c14aa370 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4295,6 +4295,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4311,9 +4312,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // vectors. VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo); - SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, - MMO, false /* Truncating */, - IsCompressing); + SDValue StoreNode = + DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, + ISD::UNINDEXED, false /* Truncating */, IsCompressing); DAG.setRoot(StoreNode); setValue(&I, StoreNode); } @@ -4461,6 +4462,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Ptr = getValue(PtrOperand); SDValue Src0 = getValue(Src0Operand); SDValue Mask = getValue(MaskOperand); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT VT = Src0.getValueType(); if (!Alignment) @@ -4491,8 +4493,9 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { VT.getStoreSize().getKnownMinSize(), Alignment, AAInfo, Ranges); - SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, - ISD::NON_EXTLOAD, IsExpanding); + SDValue Load = + DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, + ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding); if (AddToChain) PendingLoads.push_back(Load.getValue(1)); setValue(&I, Load); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bc10f76212394..f863d9876486b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -685,6 +685,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (doExt) OS << " from " << MLd->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MLd->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MLd->isExpandingLoad()) OS << ", expanding"; @@ -696,6 +700,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (MSt->isTruncatingStore()) OS << ", trunc to " << MSt->getMemoryVT().getEVTString(); + const char *AM = getIndexedModeName(MSt->getAddressingMode()); + if (*AM) + OS << ", " << AM; + if (MSt->isCompressingStore()) OS << ", compressing"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index af7dc432eae5b..cc436fcc4f684 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -633,6 +633,8 @@ void TargetLoweringBase::initActions() { IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { setIndexedLoadAction(IM, VT, Expand); setIndexedStoreAction(IM, VT, Expand); + setIndexedMaskedLoadAction(IM, VT, Expand); + setIndexedMaskedStoreAction(IM, VT, Expand); } // Most backends expect to see the node which just returns the value loaded. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 80cf31ff3d56b..ec84c1efbaf14 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -262,15 +262,17 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, // non-extending masked load fragment. def nonext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getExtensionType() == ISD::NON_EXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast(N)->isUnindexed(); }]>; // sign extending masked load fragments. def asext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def),[{ - return cast(N)->getExtensionType() == ISD::EXTLOAD || - cast(N)->getExtensionType() == ISD::SEXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ + return (cast(N)->getExtensionType() == ISD::EXTLOAD || + cast(N)->getExtensionType() == ISD::SEXTLOAD) && + cast(N)->isUnindexed(); }]>; def asext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -290,8 +292,9 @@ def asext_masked_load_i32 : // zero extending masked load fragments. def zext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), - (masked_ld node:$ptr, node:$pred, node:$def), [{ - return cast(N)->getExtensionType() == ISD::ZEXTLOAD; + (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ + return cast(N)->getExtensionType() == ISD::ZEXTLOAD && + cast(N)->isUnindexed(); }]>; def zext_masked_load_i8 : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -312,14 +315,16 @@ def zext_masked_load_i32 : // non-truncating masked store fragment. def nontrunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ - return !cast(N)->isTruncatingStore(); + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + return !cast(N)->isTruncatingStore() && + cast(N)->isUnindexed(); }]>; // truncating masked store fragments. def trunc_masked_store : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ - return cast(N)->isTruncatingStore(); + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ + return cast(N)->isTruncatingStore() && + cast(N)->isUnindexed(); }]>; def trunc_masked_store_i8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 46a2560e16745..a6b334938e179 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1351,11 +1351,27 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, unsigned Shift) { unsigned Opcode = Op->getOpcode(); - ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) - ? cast(Op)->getAddressingMode() - : cast(Op)->getAddressingMode(); + ISD::MemIndexedMode AM; + switch (Opcode) { + case ISD::LOAD: + AM = cast(Op)->getAddressingMode(); + break; + case ISD::STORE: + AM = cast(Op)->getAddressingMode(); + break; + case ISD::MLOAD: + AM = cast(Op)->getAddressingMode(); + break; + case ISD::MSTORE: + AM = cast(Op)->getAddressingMode(); + break; + default: + llvm_unreachable("Unexpected Opcode for Imm7Offset"); + } + int RHSC; - if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits. + // 7 bit constant, shifted by Shift. + if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) @@ -1625,58 +1641,93 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { } bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { - LoadSDNode *LD = cast(N); - ISD::MemIndexedMode AM = LD->getAddressingMode(); - if (AM == ISD::UNINDEXED) - return false; - EVT LoadedVT = LD->getMemoryVT(); - if (!LoadedVT.isVector()) - return false; - bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; - SDValue Offset; - bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + EVT LoadedVT; unsigned Opcode = 0; - unsigned Align = LD->getAlignment(); - bool IsLE = Subtarget->isLittle(); + bool isSExtLd, isPre; + unsigned Align; + ARMVCC::VPTCodes Pred; + SDValue PredReg; + SDValue Chain, Base, Offset; + + if (LoadSDNode *LD = dyn_cast(N)) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + + Chain = LD->getChain(); + Base = LD->getBasePtr(); + Offset = LD->getOffset(); + Align = LD->getAlignment(); + isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + Pred = ARMVCC::None; + PredReg = CurDAG->getRegister(0, MVT::i32); + } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + Chain = LD->getChain(); + Base = LD->getBasePtr(); + Offset = LD->getOffset(); + Align = LD->getAlignment(); + isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + Pred = ARMVCC::Then; + PredReg = LD->getMask(); + } else + llvm_unreachable("Expected a Load or a Masked Load!"); + + // We allow LE non-masked loads to change the type (for example use a vldrb.8 + // as opposed to a vldrw.32). This can allow extra addressing modes or + // alignments for what is otherwise an equivalent instruction. + bool CanChangeType = Subtarget->isLittle() && !isa(N); + + SDValue NewOffset; if (Align >= 2 && LoadedVT == MVT::v4i16 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; else Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; } else if (LoadedVT == MVT::v8i8 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; else Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; } else if (LoadedVT == MVT::v4i8 && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { if (isSExtLd) Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; else Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; } else if (Align >= 4 && - (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2)) + (CanChangeType || LoadedVT == MVT::v4i32 || + LoadedVT == MVT::v4f32) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; else if (Align >= 2 && - (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) + (CanChangeType || LoadedVT == MVT::v8i16 || + LoadedVT == MVT::v8f16) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; - else if ((IsLE || LoadedVT == MVT::v16i8) && - SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) + else if ((CanChangeType || LoadedVT == MVT::v16i8) && + SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; else return false; - SDValue Chain = LD->getChain(); - SDValue Base = LD->getBasePtr(); - SDValue Ops[] = {Base, Offset, - CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), - CurDAG->getRegister(0, MVT::i32), Chain}; - SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), + SDValue Ops[] = {Base, NewOffset, + CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, + Chain}; + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), N->getValueType(0), MVT::i32, MVT::Other, Ops); transferMemOperands(N, New); ReplaceUses(SDValue(N, 0), SDValue(New, 1)); @@ -3292,6 +3343,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + case ISD::MLOAD: + if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) + return; + // Other cases are autogenerated. + break; case ARMISD::WLS: case ARMISD::LE: { SDValue Ops[] = { N->getOperand(1), diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e359756b7bf45..c153e786e2ddb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -296,6 +296,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); } } @@ -322,6 +324,8 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { setIndexedLoadAction(im, VT, Legal); setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); } if (HasMVEFP) { @@ -374,12 +378,12 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { // Pre and Post inc on these are legal, given the correct extends for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { - setIndexedLoadAction(im, MVT::v8i8, Legal); - setIndexedStoreAction(im, MVT::v8i8, Legal); - setIndexedLoadAction(im, MVT::v4i8, Legal); - setIndexedStoreAction(im, MVT::v4i8, Legal); - setIndexedLoadAction(im, MVT::v4i16, Legal); - setIndexedStoreAction(im, MVT::v4i16, Legal); + for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + setIndexedMaskedLoadAction(im, VT, Legal); + setIndexedMaskedStoreAction(im, VT, Legal); + } } // Predicate types @@ -9013,8 +9017,9 @@ static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT, DAG.getTargetConstant(0, dl, MVT::i32)); SDValue NewLoad = DAG.getMaskedLoad( - VT, dl, N->getChain(), N->getBasePtr(), Mask, ZeroVec, N->getMemoryVT(), - N->getMemOperand(), N->getExtensionType(), N->isExpandingLoad()); + VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec, + N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + N->getExtensionType(), N->isExpandingLoad()); SDValue Combo = NewLoad; if (!PassThru.isUndef() && (PassThru.getOpcode() != ISD::BITCAST || @@ -15192,14 +15197,19 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, } static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, - bool isSEXTLoad, bool isLE, SDValue &Base, - SDValue &Offset, bool &isInc, - SelectionDAG &DAG) { + bool isSEXTLoad, bool IsMasked, bool isLE, + SDValue &Base, SDValue &Offset, + bool &isInc, SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; if (!isa(Ptr->getOperand(1))) return false; + // We allow LE non-masked loads to change the type (for example use a vldrb.8 + // as opposed to a vldrw.32). This can allow extra addressing modes or + // alignments for what is otherwise an equivalent instruction. + bool CanChangeType = isLE && !IsMasked; + ConstantSDNode *RHS = cast(Ptr->getOperand(1)); int RHSC = (int)RHS->getZExtValue(); @@ -15218,7 +15228,7 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, }; // Try to find a matching instruction based on s/zext, Alignment, Offset and - // (in BE) type. + // (in BE/masked) type. Base = Ptr->getOperand(0); if (VT == MVT::v4i16) { if (Align >= 2 && IsInRange(RHSC, 0x80, 2)) @@ -15226,13 +15236,15 @@ static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, } else if (VT == MVT::v4i8 || VT == MVT::v8i8) { if (IsInRange(RHSC, 0x80, 1)) return true; - } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) && + } else if (Align >= 4 && + (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) && IsInRange(RHSC, 0x80, 4)) return true; - else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) && + else if (Align >= 2 && + (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) && IsInRange(RHSC, 0x80, 2)) return true; - else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) + else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) return true; return false; } @@ -15252,6 +15264,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue Ptr; unsigned Align; bool isSEXTLoad = false; + bool IsMasked = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); @@ -15261,6 +15274,17 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); Align = ST->getAlignment(); + } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { + Ptr = LD->getBasePtr(); + VT = LD->getMemoryVT(); + Align = LD->getAlignment(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast(N)) { + Ptr = ST->getBasePtr(); + VT = ST->getMemoryVT(); + Align = ST->getAlignment(); + IsMasked = true; } else return false; @@ -15269,8 +15293,8 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) isLegal = Subtarget->hasMVEIntegerOps() && getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad, - Subtarget->isLittle(), Base, Offset, - isInc, DAG); + IsMasked, Subtarget->isLittle(), Base, + Offset, isInc, DAG); else { if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, @@ -15298,6 +15322,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue Ptr; unsigned Align; bool isSEXTLoad = false, isNonExt; + bool IsMasked = false; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); @@ -15309,6 +15334,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, Ptr = ST->getBasePtr(); Align = ST->getAlignment(); isNonExt = !ST->isTruncatingStore(); + } else if (MaskedLoadSDNode *LD = dyn_cast(N)) { + VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); + Align = LD->getAlignment(); + isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; + IsMasked = true; + } else if (MaskedStoreSDNode *ST = dyn_cast(N)) { + VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); + Align = ST->getAlignment(); + isNonExt = !ST->isTruncatingStore(); + IsMasked = true; } else return false; @@ -15332,7 +15370,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isLegal = false; if (VT.isVector()) isLegal = Subtarget->hasMVEIntegerOps() && - getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, + getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, IsMasked, Subtarget->isLittle(), Base, Offset, isInc, DAG); else { diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 429d0a1cf1bdf..dd8c032dae423 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5332,6 +5332,10 @@ class MVE_vector_offset_store_typed : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; +class MVE_vector_offset_maskedstore_typed + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr, VCCR:$pred), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr, (i32 1), VCCR:$pred)>; multiclass MVE_vector_offset_store { @@ -5363,7 +5367,7 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); return Ld->getMemoryVT().getScalarType() == MVT::i8; }]>; @@ -5382,7 +5386,7 @@ def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; @@ -5402,14 +5406,14 @@ def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, node:$pred, node:$passthru), [{ + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; }]>; def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), @@ -5417,7 +5421,7 @@ def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), return cast(N)->isTruncatingStore(); }]>; def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ auto *St = cast(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; @@ -5428,12 +5432,41 @@ def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), return cast(N)->isTruncatingStore(); }]>; def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, node:$pred), [{ + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ auto *St = cast(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; }]>; + +def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), + (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; +}]>; +def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), + (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return AM == ISD::POST_INC || AM == ISD::POST_DEC; +}]>; +def aligned32_pre_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned32_post_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned16_pre_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast(N)->getAlignment() >= 2; +}]>; +def aligned16_post_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast(N)->getAlignment() >= 2; +}]>; + + let Predicates = [HasMVEInt, IsLE] in { // Stores defm : MVE_vector_store; @@ -5515,19 +5548,26 @@ let Predicates = [HasMVEInt] in { def : MVE_vector_maskedstore_typed; def : MVE_vector_maskedstore_typed; def : MVE_vector_maskedstore_typed; - // Truncating stores - def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB16 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), t2addrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr, VCCR:$pred), - (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + + // Pre/Post inc masked stores + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + // Aligned masked loads def : MVE_vector_maskedload_typed; def : MVE_vector_maskedload_typed; def : MVE_vector_maskedload_typed; def : MVE_vector_maskedload_typed; def : MVE_vector_maskedload_typed; + // Extending masked loads. def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, (v8i16 NEONimmAllZerosV))), @@ -5569,6 +5609,37 @@ let MinAlignment = 2 in { (pre_truncstvi16 node:$val, node:$base, node:$offset)>; } +def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (masked_st node:$val, node:$base, node:$offset, node:$pred), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; +}]>; +def pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def pre_truncmaskedstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (masked_st node:$val, node:$base, node:$offset, node:$postd), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return AM == ISD::POST_INC || AM == ISD::POST_DEC; +}]>; +def post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def post_truncmaskedstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; + let Predicates = [HasMVEInt] in { def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; @@ -5590,6 +5661,27 @@ let Predicates = [HasMVEInt] in { (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr, VCCR:$pred), + (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr, VCCR:$pred), + (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; + + def : Pat<(post_truncmaskedstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), + (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(post_truncmaskedstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), + (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(post_truncmaskedstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr, VCCR:$pred), + (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr, (i32 1), VCCR:$pred)>; + + def : Pat<(pre_truncmaskedstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), + (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(pre_truncmaskedstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), + (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(pre_truncmaskedstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr, VCCR:$pred), + (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr, (i32 1), VCCR:$pred)>; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c3861adf09122..32072df268d3f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24280,9 +24280,11 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + SDValue Offset = DAG.getUNDEF(VMask.getValueType()); - return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT, - MemIntr->getMemOperand(), true /* truncating */); + return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask, + MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED, + true /* truncating */); } case X86ISD::VTRUNCUS: case X86ISD::VTRUNCS: { @@ -27593,12 +27595,11 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode())) return Op; - SDValue NewLoad = DAG.getMaskedLoad(VT, dl, N->getChain(), - N->getBasePtr(), Mask, - getZeroVector(VT, Subtarget, DAG, dl), - N->getMemoryVT(), N->getMemOperand(), - N->getExtensionType(), - N->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(), + N->isExpandingLoad()); // Emit a blend. SDValue Select = DAG.getNode(ISD::VSELECT, dl, MaskVT, Mask, NewLoad, PassThru); @@ -27632,11 +27633,10 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), - N->getBasePtr(), Mask, PassThru, - N->getMemoryVT(), N->getMemOperand(), - N->getExtensionType(), - N->isExpandingLoad()); + SDValue NewLoad = DAG.getMaskedLoad( + WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, + PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + N->getExtensionType(), N->isExpandingLoad()); SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), @@ -27682,7 +27682,8 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), - Mask, N->getMemoryVT(), N->getMemOperand(), + N->getOffset(), Mask, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); } @@ -40453,6 +40454,7 @@ static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, static SDValue reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + assert(ML->isUnindexed() && "Unexpected indexed masked load!"); // TODO: This is not x86-specific, so it could be lifted to DAGCombiner. // However, some target hooks may need to be added to know when the transform // is profitable. Endianness would also have to be considered. @@ -40480,6 +40482,7 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, static SDValue combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + assert(ML->isUnindexed() && "Unexpected indexed masked load!"); if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode())) return SDValue(); @@ -40515,10 +40518,10 @@ combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, // The new masked load has an undef pass-through operand. The select uses the // original pass-through operand. - SDValue NewML = DAG.getMaskedLoad(VT, DL, ML->getChain(), ML->getBasePtr(), - ML->getMask(), DAG.getUNDEF(VT), - ML->getMemoryVT(), ML->getMemOperand(), - ML->getExtensionType()); + SDValue NewML = DAG.getMaskedLoad( + VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(), + DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(), + ML->getAddressingMode(), ML->getExtensionType()); SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, ML->getPassThru()); @@ -40604,8 +40607,9 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), Mst->getMemoryVT())) { return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), - Mst->getBasePtr(), Mask, - Mst->getMemoryVT(), Mst->getMemOperand(), true); + Mst->getBasePtr(), Mst->getOffset(), Mask, + Mst->getMemoryVT(), Mst->getMemOperand(), + Mst->getAddressingMode(), true); } return SDValue(); diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index de6f8a81dff65..1a4f7e1e6bbd6 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -706,6 +706,10 @@ def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>; +def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2> +]>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// @@ -1040,9 +1044,10 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec, INSERT_get_vinsert256_imm>; def masked_load : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_ld node:$src1, node:$src2, node:$src3), [{ + (masked_ld node:$src1, undef, node:$src2, node:$src3), [{ return !cast(N)->isExpandingLoad() && - cast(N)->getExtensionType() == ISD::NON_EXTLOAD; + cast(N)->getExtensionType() == ISD::NON_EXTLOAD && + cast(N)->isUnindexed(); }]>; def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1055,17 +1060,19 @@ def masked_load_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), }]>; def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_ld node:$src1, node:$src2, node:$src3), [{ - return cast(N)->isExpandingLoad(); + (masked_ld node:$src1, undef, node:$src2, node:$src3), [{ + return cast(N)->isExpandingLoad() && + cast(N)->isUnindexed(); }]>; // Masked store fragments. // X86mstore can't be implemented in core DAG files because some targets // do not support vector types (llvm-tblgen will fail). def masked_store : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return (!cast(N)->isTruncatingStore()) && - (!cast(N)->isCompressingStore()); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return !cast(N)->isTruncatingStore() && + !cast(N)->isCompressingStore() && + cast(N)->isUnindexed(); }]>; def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1078,16 +1085,18 @@ def masked_store_aligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), }]>; def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return cast(N)->isCompressingStore(); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return cast(N)->isCompressingStore() && + cast(N)->isUnindexed(); }]>; // masked truncstore fragments // X86mtruncstore can't be implemented in core DAG files because some targets // doesn't support vector type ( llvm-tblgen will fail) def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3), - (masked_st node:$src1, node:$src2, node:$src3), [{ - return cast(N)->isTruncatingStore(); + (masked_st node:$src1, node:$src2, undef, node:$src3), [{ + return cast(N)->isTruncatingStore() && + cast(N)->isUnindexed(); }]>; def masked_truncstorevi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), @@ -1111,10 +1120,10 @@ def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore, def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore, +def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTX86MaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore, +def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr), diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 5900dd9ac66a9..0b50b9a1db4e9 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -154,11 +154,11 @@ for.cond.cleanup: ; preds = %middle.block, %entr ; CHECK-NEXT: vldrwt.u32 ; CHECK-NEXT: vldrwt.u32 ; CHECK: mov [[ELEMS_OUT:r[0-9]+]], [[ELEMS]] +; CHECK: sub{{.*}} [[ELEMS]],{{.*}}#4 ; CHECK: vpsttt ; CHECK-NEXT: vcmpt.i32 eq, {{.*}}, zr ; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r3] ; CHECK-NEXT: vldrwt.u32 q{{.*}}, [r2] -; CHECK: sub{{.*}} [[ELEMS]],{{.*}}#4 ; CHECK: le lr, [[LOOP]] ; CHECK: vctp.32 [[ELEMS_OUT]] ; CHECK: vpsel diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index f285b445cf3cf..f7c9236c6e62f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -39,14 +39,11 @@ define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocaptur ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB0_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] -; CHECK-NEXT: vmul.f32 q0, q1, q0 -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r2, #16 -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q1, [r2], #16 ; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vmul.f32 q0, q1, q0 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB0_5 ; CHECK-NEXT: b .LBB0_11 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader.new @@ -236,13 +233,11 @@ define arm_aapcs_vfpcc float @fast_float_mac(float* nocapture readonly %b, float ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q2, [r0] -; CHECK-NEXT: vldrwt.u32 q3, [r1] ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: adds r0, #16 -; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 +; CHECK-NEXT: vldrwt.u32 q3, [r1], #16 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vfma.f32 q0, q3, q2 ; CHECK-NEXT: le lr, .LBB1_2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index 21be95e1fcc8a..23c447284293f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -88,10 +88,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB1_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.s32 q2, [r1] ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: adds r1, #8 ; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vldrh.s32 q2, [r1], #8 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: letp lr, .LBB1_1 @@ -229,10 +228,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u32 q2, [r1] ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: adds r1, #8 ; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vldrh.u32 q2, [r1], #8 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: letp lr, .LBB3_1 @@ -295,10 +293,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q2, [r1] ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: letp lr, .LBB4_1 @@ -390,11 +387,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_char(i8* nocapture readonly ; CHECK-NEXT: adds r5, r1, r4 ; CHECK-NEXT: vldrb.u32 q1, [r5] ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB5_5 ; CHECK-NEXT: b .LBB5_12 ; CHECK-NEXT: .LBB5_6: @ %for.body.preheader.new @@ -594,15 +590,12 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_short(i16* nocapture readon ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.s32 q0, [r0] -; CHECK-NEXT: vldrh.s32 q1, [r1] +; CHECK-NEXT: vldrh.s32 q0, [r0], #8 +; CHECK-NEXT: vldrh.s32 q1, [r1], #8 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -691,11 +684,10 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_uchar(i8* nocapture readonl ; CHECK-NEXT: adds r5, r1, r4 ; CHECK-NEXT: vldrb.u32 q1, [r5] ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB7_5 ; CHECK-NEXT: b .LBB7_12 ; CHECK-NEXT: .LBB7_6: @ %for.body.preheader.new @@ -895,15 +887,12 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_ushort(i16* nocapture reado ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: vldrh.u32 q1, [r1] +; CHECK-NEXT: vldrh.u32 q0, [r0], #8 +; CHECK-NEXT: vldrh.u32 q1, [r1], #8 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: adds r0, #8 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r1, #8 -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} @@ -988,15 +977,12 @@ define arm_aapcs_vfpcc void @test_vec_mul_scalar_add_int(i32* nocapture readonly ; CHECK-NEXT: dlstp.32 lr, r12 ; CHECK-NEXT: .LBB9_5: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: adds r0, #16 -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r3] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r3, #16 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB9_5 ; CHECK-NEXT: b .LBB9_11 ; CHECK-NEXT: .LBB9_6: @ %for.body.preheader.new @@ -1189,12 +1175,11 @@ define dso_local arm_aapcs_vfpcc void @test_v8i8_to_v8i16(i16* noalias nocapture ; CHECK-NEXT: add.w r4, r1, r12 ; CHECK-NEXT: vldrb.u16 q0, [r4] ; CHECK-NEXT: add.w r4, r2, r12 -; CHECK-NEXT: vldrb.u16 q1, [r4] -; CHECK-NEXT: vmul.i16 q0, q1, q0 -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r0, #16 ; CHECK-NEXT: add.w r12, r12, #8 ; CHECK-NEXT: subs r3, #8 +; CHECK-NEXT: vldrb.u16 q1, [r4] +; CHECK-NEXT: vmul.i16 q0, q1, q0 +; CHECK-NEXT: vstrh.16 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB10_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r4, pc} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll index fdf04db82207f..04f408d78acb8 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll @@ -9,23 +9,21 @@ define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* no ; CHECK-NEXT: moveq r0, #0 ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r1] +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: vmul.i32 q0, q2, q0 -; CHECK-NEXT: adds r0, #16 -; CHECK-NEXT: adds r1, #16 +; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %middle.block ; CHECK-NEXT: vctp.32 r3 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 ; CHECK-NEXT: pop {r7, pc} entry: @@ -83,8 +81,7 @@ define dso_local i32 @mul_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: letp lr, .LBB1_1 @@ -144,8 +141,7 @@ define dso_local i32 @add_reduce_add_const(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: letp lr, .LBB2_1 @@ -201,12 +197,10 @@ define dso_local void @vector_mul_const(i32* noalias nocapture %a, i32* noalias ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vmul.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vmul.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -255,12 +249,10 @@ define dso_local void @vector_add_const(i32* noalias nocapture %a, i32* noalias ; CHECK-NEXT: dlstp.32 lr, r3 ; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vadd.i32 q0, q0, r2 -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vadd.i32 q0, q0, r2 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB4_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -369,14 +361,11 @@ define dso_local arm_aapcs_vfpcc void @vector_mul_vector_i16(i16* noalias nocapt ; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] -; CHECK-NEXT: vmul.i16 q0, q1, q0 -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r1, #16 -; CHECK-NEXT: adds r2, #16 -; CHECK-NEXT: adds r0, #16 +; CHECK-NEXT: vldrh.u16 q0, [r1], #16 +; CHECK-NEXT: vldrh.u16 q1, [r2], #16 ; CHECK-NEXT: subs r3, #8 +; CHECK-NEXT: vmul.i16 q0, q1, q0 +; CHECK-NEXT: vstrh.16 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll index 69286c8777c02..0951589eaa14c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll @@ -7,8 +7,7 @@ define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -67,8 +66,7 @@ define i8* @ldrwu32_508(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vldrwt.u32 q0, [r0], #508 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -107,8 +105,7 @@ define i8* @ldrwu32_m508(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vldrwt.u32 q0, [r0], #-508 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -147,8 +144,7 @@ define i8* @ldrhu32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrht.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -189,8 +185,7 @@ define i8* @ldrhu32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.u32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrht.u32 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -210,8 +205,7 @@ define i8* @ldrhu32_254(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.u32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrht.u32 q0, [r0], #254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -252,8 +246,7 @@ define i8* @ldrhu32_m254(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.u32 q0, [r0] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vldrht.u32 q0, [r0], #-254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -294,8 +287,7 @@ define i8* @ldrhs32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.s32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrht.s32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -336,8 +328,7 @@ define i8* @ldrhs32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.s32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrht.s32 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -357,8 +348,7 @@ define i8* @ldrhs32_254(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.s32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrht.s32 q0, [r0], #254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -399,8 +389,7 @@ define i8* @ldrhs32_m254(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.s32 q0, [r0] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vldrht.s32 q0, [r0], #-254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -441,8 +430,7 @@ define i8* @ldrhu16_4(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -481,8 +469,7 @@ define i8* @ldrhu16_2(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrht.u16 q0, [r0], #2 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -501,8 +488,7 @@ define i8* @ldrhu16_254(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrht.u16 q0, [r0], #254 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -541,8 +527,7 @@ define i8* @ldrhu16_m254(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vldrht.u16 q0, [r0], #-254 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -581,8 +566,7 @@ define i8* @ldrbu32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -602,8 +586,7 @@ define i8* @ldrbu32_3(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.u32 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -623,8 +606,7 @@ define i8* @ldrbu32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.u32 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -644,8 +626,7 @@ define i8* @ldrbu32_127(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.u32 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -686,8 +667,7 @@ define i8* @ldrbu32_m127(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.u32 q0, [r0], #-127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -728,8 +708,7 @@ define i8* @ldrbs32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.s32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -749,8 +728,7 @@ define i8* @ldrbs32_3(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.s32 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -770,8 +748,7 @@ define i8* @ldrbs32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.s32 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -791,8 +768,7 @@ define i8* @ldrbs32_127(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.s32 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -833,8 +809,7 @@ define i8* @ldrbs32_m127(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.s32 q0, [r0], #-127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -875,8 +850,7 @@ define i8* @ldrbu16_4(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.u16 q0, [r0], #4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -896,8 +870,7 @@ define i8* @ldrbu16_3(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.u16 q0, [r0], #3 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -917,8 +890,7 @@ define i8* @ldrbu16_2(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.u16 q0, [r0], #2 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -938,8 +910,7 @@ define i8* @ldrbu16_127(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.u16 q0, [r0], #127 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -980,8 +951,7 @@ define i8* @ldrbu16_m127(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.u16 q0, [r0], #-127 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1022,8 +992,7 @@ define i8* @ldrbs16_4(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.s16 q0, [r0], #4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1043,8 +1012,7 @@ define i8* @ldrbs16_3(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.s16 q0, [r0], #3 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1064,8 +1032,7 @@ define i8* @ldrbs16_2(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.s16 q0, [r0], #2 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1085,8 +1052,7 @@ define i8* @ldrbs16_127(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.s16 q0, [r0], #127 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1127,8 +1093,7 @@ define i8* @ldrbs16_m127(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.s16 q0, [r0], #-127 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1169,8 +1134,7 @@ define i8* @ldrbu8_4(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.u8 q0, [r0], #4 ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1189,8 +1153,7 @@ define i8* @ldrbu8_3(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.u8 q0, [r0], #3 ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1209,8 +1172,7 @@ define i8* @ldrbu8_2(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.u8 q0, [r0], #2 ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1229,8 +1191,7 @@ define i8* @ldrbu8_127(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.u8 q0, [r0], #127 ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1269,8 +1230,7 @@ define i8* @ldrbu8_m127(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.u8 q0, [r0], #-127 ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1309,8 +1269,7 @@ define i8* @ldrwf32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1369,8 +1328,7 @@ define i8* @ldrwf32_508(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vldrwt.u32 q0, [r0], #508 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1409,8 +1367,7 @@ define i8* @ldrwf32_m508(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vldrwt.u32 q0, [r0], #-508 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1449,8 +1406,7 @@ define i8* @ldrhf16_4(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1489,8 +1445,7 @@ define i8* @ldrhf16_2(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrht.u16 q0, [r0], #2 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1509,8 +1464,7 @@ define i8* @ldrhf16_254(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrht.u16 q0, [r0], #254 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1549,8 +1503,7 @@ define i8* @ldrhf16_m254(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vldrht.u16 q0, [r0], #-254 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1593,8 +1546,7 @@ define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrwt.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1653,8 +1605,7 @@ define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrwt.32 q0, [r0], #508 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 508 @@ -1693,8 +1644,7 @@ define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrwt.32 q0, [r0], #-508 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 @@ -1733,8 +1683,7 @@ define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrht.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrht.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1773,8 +1722,7 @@ define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrht.32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrht.32 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -1793,8 +1741,7 @@ define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrht.32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrht.32 q0, [r0], #254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1833,8 +1780,7 @@ define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrht.32 q0, [r0] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrht.32 q0, [r0], #-254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -254 @@ -1873,8 +1819,7 @@ define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrht.16 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1913,8 +1858,7 @@ define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrht.16 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -1933,8 +1877,7 @@ define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrht.16 q0, [r0], #254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1973,8 +1916,7 @@ define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrht.16 q0, [r0], #-254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -254 @@ -2013,8 +1955,7 @@ define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrbt.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2033,8 +1974,7 @@ define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrbt.32 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -2053,8 +1993,7 @@ define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrbt.32 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -2073,8 +2012,7 @@ define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrbt.32 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -2113,8 +2051,7 @@ define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrbt.32 q0, [r0], #-127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -127 @@ -2153,8 +2090,7 @@ define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrbt.16 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2173,8 +2109,7 @@ define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrbt.16 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -2193,8 +2128,7 @@ define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrbt.16 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -2213,8 +2147,7 @@ define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrbt.16 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -2253,8 +2186,7 @@ define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrbt.16 q0, [r0], #-127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -127 @@ -2293,8 +2225,7 @@ define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrbt.8 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2313,8 +2244,7 @@ define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrbt.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -2333,8 +2263,7 @@ define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrbt.8 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -2353,8 +2282,7 @@ define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrbt.8 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -2393,8 +2321,7 @@ define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrbt.8 q0, [r0], #-127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -127 @@ -2433,8 +2360,7 @@ define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrwt.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2493,8 +2419,7 @@ define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrwt.32 q0, [r0], #508 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 508 @@ -2533,8 +2458,7 @@ define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrwt.32 q0, [r0], #-508 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 @@ -2573,8 +2497,7 @@ define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrht.16 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2613,8 +2536,7 @@ define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrht.16 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -2633,8 +2555,7 @@ define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrht.16 q0, [r0], #254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -2673,8 +2594,7 @@ define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrht.16 q0, [r0], #-254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -254 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll index 287446963ce66..beb5aae634116 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll @@ -7,8 +7,7 @@ define i8* @ldrwu32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -67,8 +66,7 @@ define i8* @ldrwu32_508(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -107,8 +105,7 @@ define i8* @ldrwu32_m508(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -147,8 +144,7 @@ define i8* @ldrhu32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrht.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -189,8 +185,7 @@ define i8* @ldrhu32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.u32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrht.u32 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -210,8 +205,7 @@ define i8* @ldrhu32_254(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.u32 q0, [r0, #254] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrht.u32 q0, [r0, #254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -252,8 +246,7 @@ define i8* @ldrhu32_m254(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.u32 q0, [r0, #-254] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vldrht.u32 q0, [r0, #-254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -294,8 +287,7 @@ define i8* @ldrhs32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.s32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrht.s32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -336,8 +328,7 @@ define i8* @ldrhs32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.s32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrht.s32 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -357,8 +348,7 @@ define i8* @ldrhs32_254(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.s32 q0, [r0, #254] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrht.s32 q0, [r0, #254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -399,8 +389,7 @@ define i8* @ldrhs32_m254(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrht.s32 q0, [r0, #-254] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vldrht.s32 q0, [r0, #-254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -441,8 +430,7 @@ define i8* @ldrhu16_4(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -481,8 +469,7 @@ define i8* @ldrhu16_2(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrht.u16 q0, [r0, #2]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -501,8 +488,7 @@ define i8* @ldrhu16_254(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0, #254] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrht.u16 q0, [r0, #254]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -541,8 +527,7 @@ define i8* @ldrhu16_m254(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -581,8 +566,7 @@ define i8* @ldrbu32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -602,8 +586,7 @@ define i8* @ldrbu32_3(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.u32 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -623,8 +606,7 @@ define i8* @ldrbu32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.u32 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -644,8 +626,7 @@ define i8* @ldrbu32_127(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0, #127] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.u32 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -686,8 +667,7 @@ define i8* @ldrbu32_m127(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.u32 q0, [r0, #-127] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.u32 q0, [r0, #-127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -728,8 +708,7 @@ define i8* @ldrbs32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.s32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -749,8 +728,7 @@ define i8* @ldrbs32_3(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.s32 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -770,8 +748,7 @@ define i8* @ldrbs32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.s32 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -791,8 +768,7 @@ define i8* @ldrbs32_127(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0, #127] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.s32 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -833,8 +809,7 @@ define i8* @ldrbs32_m127(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrbt.s32 q0, [r0, #-127] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.s32 q0, [r0, #-127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -875,8 +850,7 @@ define i8* @ldrbu16_4(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -896,8 +870,7 @@ define i8* @ldrbu16_3(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.u16 q0, [r0, #3]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -917,8 +890,7 @@ define i8* @ldrbu16_2(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.u16 q0, [r0, #2]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -938,8 +910,7 @@ define i8* @ldrbu16_127(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0, #127] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.u16 q0, [r0, #127]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -980,8 +951,7 @@ define i8* @ldrbu16_m127(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.u16 q0, [r0, #-127] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.u16 q0, [r0, #-127]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1022,8 +992,7 @@ define i8* @ldrbs16_4(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.s16 q0, [r0, #4]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1043,8 +1012,7 @@ define i8* @ldrbs16_3(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.s16 q0, [r0, #3]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1064,8 +1032,7 @@ define i8* @ldrbs16_2(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.s16 q0, [r0, #2]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1085,8 +1052,7 @@ define i8* @ldrbs16_127(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0, #127] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.s16 q0, [r0, #127]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1127,8 +1093,7 @@ define i8* @ldrbs16_m127(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrbt.s16 q0, [r0, #-127] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.s16 q0, [r0, #-127]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1169,8 +1134,7 @@ define i8* @ldrbu8_4(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrbt.u8 q0, [r0, #4]! ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1189,8 +1153,7 @@ define i8* @ldrbu8_3(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrbt.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1209,8 +1172,7 @@ define i8* @ldrbu8_2(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrbt.u8 q0, [r0, #2]! ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1229,8 +1191,7 @@ define i8* @ldrbu8_127(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0, #127] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrbt.u8 q0, [r0, #127]! ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1269,8 +1230,7 @@ define i8* @ldrbu8_m127(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r2] ; CHECK-NEXT: vpt.i8 ne, q0, zr -; CHECK-NEXT: vldrbt.u8 q0, [r0, #-127] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vldrbt.u8 q0, [r0, #-127]! ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1309,8 +1269,7 @@ define i8* @ldrwf32_4(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1369,8 +1328,7 @@ define i8* @ldrwf32_508(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vldrwt.u32 q0, [r0, #508]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1409,8 +1367,7 @@ define i8* @ldrwf32_m508(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vpt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1449,8 +1406,7 @@ define i8* @ldrhf16_4(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1489,8 +1445,7 @@ define i8* @ldrhf16_2(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrht.u16 q0, [r0, #2]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1509,8 +1464,7 @@ define i8* @ldrhf16_254(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0, #254] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrht.u16 q0, [r0, #254]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1549,8 +1503,7 @@ define i8* @ldrhf16_m254(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vpt.i16 ne, q0, zr -; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vldrht.u16 q0, [r0, #-254]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -1593,8 +1546,7 @@ define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrwt.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1653,8 +1605,7 @@ define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0, #508] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrwt.32 q0, [r0, #508]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 508 @@ -1693,8 +1644,7 @@ define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 @@ -1733,8 +1683,7 @@ define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrht.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrht.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1773,8 +1722,7 @@ define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrht.32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrht.32 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -1793,8 +1741,7 @@ define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrht.32 q0, [r0, #254] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrht.32 q0, [r0, #254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1833,8 +1780,7 @@ define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrht.32 q0, [r0, #-254] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrht.32 q0, [r0, #-254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -254 @@ -1873,8 +1819,7 @@ define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrht.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1913,8 +1858,7 @@ define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrht.16 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -1933,8 +1877,7 @@ define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0, #254] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrht.16 q0, [r0, #254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1973,8 +1916,7 @@ define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0, #-254] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrht.16 q0, [r0, #-254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -254 @@ -2013,8 +1955,7 @@ define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrbt.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2033,8 +1974,7 @@ define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrbt.32 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -2053,8 +1993,7 @@ define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrbt.32 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -2073,8 +2012,7 @@ define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0, #127] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrbt.32 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -2113,8 +2051,7 @@ define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrbt.32 q0, [r0, #-127] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrbt.32 q0, [r0, #-127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -127 @@ -2153,8 +2090,7 @@ define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrbt.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2173,8 +2109,7 @@ define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrbt.16 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -2193,8 +2128,7 @@ define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrbt.16 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -2213,8 +2147,7 @@ define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0, #127] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrbt.16 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -2253,8 +2186,7 @@ define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrbt.16 q0, [r0, #-127] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrbt.16 q0, [r0, #-127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -127 @@ -2293,8 +2225,7 @@ define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrbt.8 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2313,8 +2244,7 @@ define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrbt.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -2333,8 +2263,7 @@ define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrbt.8 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -2353,8 +2282,7 @@ define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0, #127] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrbt.8 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -2393,8 +2321,7 @@ define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr -; CHECK-NEXT: vstrbt.8 q0, [r0, #-127] -; CHECK-NEXT: subs r0, #127 +; CHECK-NEXT: vstrbt.8 q0, [r0, #-127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -127 @@ -2433,8 +2360,7 @@ define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrwt.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2493,8 +2419,7 @@ define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0, #508] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrwt.32 q0, [r0, #508]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 508 @@ -2533,8 +2458,7 @@ define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr -; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrwt.32 q0, [r0, #-508]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 @@ -2573,8 +2497,7 @@ define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrht.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -2613,8 +2536,7 @@ define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrht.16 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -2633,8 +2555,7 @@ define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0, #254] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrht.16 q0, [r0, #254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -2673,8 +2594,7 @@ define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr -; CHECK-NEXT: vstrht.16 q0, [r0, #-254] -; CHECK-NEXT: subs r0, #254 +; CHECK-NEXT: vstrht.16 q0, [r0, #-254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -254 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll index e75e07604e879..54a94b8981c2e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-load.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-load.ll @@ -468,8 +468,7 @@ define arm_aapcs_vfpcc i8* @masked_v4i32_preinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-LE-LABEL: masked_v4i32_preinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -477,8 +476,7 @@ define arm_aapcs_vfpcc i8* @masked_v4i32_preinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrw.32 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -495,8 +493,7 @@ define arm_aapcs_vfpcc i8* @masked_v4i32_postinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-LE-LABEL: masked_v4i32_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -504,8 +501,7 @@ define arm_aapcs_vfpcc i8* @masked_v4i32_postinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-BE-NEXT: vstrw.32 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1032,8 +1028,7 @@ define i8* @masked_v8i16_preinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-LE-NEXT: vldr d1, [sp] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1043,8 +1038,7 @@ define i8* @masked_v8i16_preinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrh.16 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1061,8 +1055,7 @@ define arm_aapcs_vfpcc i8* @masked_v8i16_postinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-LE-LABEL: masked_v8i16_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vldrht.u16 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1070,8 +1063,7 @@ define arm_aapcs_vfpcc i8* @masked_v8i16_postinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u16 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-BE-NEXT: vstrh.16 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1151,8 +1143,7 @@ define arm_aapcs_vfpcc i8* @masked_v16i8_preinc(i8* %x, i8* %y, <16 x i8> %a) { ; CHECK-LE-LABEL: masked_v16i8_preinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr -; CHECK-LE-NEXT: vldrbt.u8 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1160,8 +1151,7 @@ define arm_aapcs_vfpcc i8* @masked_v16i8_preinc(i8* %x, i8* %y, <16 x i8> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vpt.s8 gt, q1, zr -; CHECK-BE-NEXT: vldrbt.u8 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrb.8 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1178,8 +1168,7 @@ define arm_aapcs_vfpcc i8* @masked_v16i8_postinc(i8* %x, i8* %y, <16 x i8> %a) { ; CHECK-LE-LABEL: masked_v16i8_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr -; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrbt.u8 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1187,8 +1176,7 @@ define arm_aapcs_vfpcc i8* @masked_v16i8_postinc(i8* %x, i8* %y, <16 x i8> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vpt.s8 gt, q1, zr -; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrbt.u8 q0, [r0], #4 ; CHECK-BE-NEXT: vstrb.8 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1355,8 +1343,7 @@ define arm_aapcs_vfpcc i8* @masked_v4f32_preinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-LE-LABEL: masked_v4f32_preinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1364,8 +1351,7 @@ define arm_aapcs_vfpcc i8* @masked_v4f32_preinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrw.32 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1382,8 +1368,7 @@ define arm_aapcs_vfpcc i8* @masked_v4f32_postinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-LE-LABEL: masked_v4f32_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1391,8 +1376,7 @@ define arm_aapcs_vfpcc i8* @masked_v4f32_postinc(i8* %x, i8* %y, <4 x i32> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.32 q1, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr -; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrwt.u32 q0, [r0], #4 ; CHECK-BE-NEXT: vstrw.32 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1724,8 +1708,7 @@ define arm_aapcs_vfpcc i8* @masked_v8f16_preinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-LE-LABEL: masked_v8f16_preinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1733,8 +1716,7 @@ define arm_aapcs_vfpcc i8* @masked_v8f16_preinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4]! ; CHECK-BE-NEXT: vstrh.16 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: @@ -1751,8 +1733,7 @@ define arm_aapcs_vfpcc i8* @masked_v8f16_postinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-LE-LABEL: masked_v8f16_postinc: ; CHECK-LE: @ %bb.0: @ %entry ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vldrht.u16 q0, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-LE-NEXT: vstrw.32 q0, [r1] ; CHECK-LE-NEXT: bx lr ; @@ -1760,8 +1741,7 @@ define arm_aapcs_vfpcc i8* @masked_v8f16_postinc(i8* %x, i8* %y, <8 x i16> %a) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr -; CHECK-BE-NEXT: vldrht.u16 q0, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vldrht.u16 q0, [r0], #4 ; CHECK-BE-NEXT: vstrh.16 q0, [r1] ; CHECK-BE-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll index 1fc9793fd50d4..425162721acf4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-store.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-store.ll @@ -111,8 +111,7 @@ define i8* @masked_v4i32_pre(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v4i32_pre: @@ -122,8 +121,7 @@ define i8* @masked_v4i32_pre(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.32 q2, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr -; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -142,8 +140,7 @@ define i8* @masked_v4i32_post(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vstrwt.32 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrwt.32 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v4i32_post: @@ -153,8 +150,7 @@ define i8* @masked_v4i32_post(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.32 q2, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr -; CHECK-BE-NEXT: vstrwt.32 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrwt.32 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -334,8 +330,7 @@ define i8* @masked_v8i16_pre(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v8i16_pre: @@ -345,8 +340,7 @@ define i8* @masked_v8i16_pre(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q2, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr -; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -365,8 +359,7 @@ define i8* @masked_v8i16_post(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vstrht.16 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrht.16 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v8i16_post: @@ -376,8 +369,7 @@ define i8* @masked_v8i16_post(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q2, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr -; CHECK-BE-NEXT: vstrht.16 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrht.16 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -416,8 +408,7 @@ define i8* @masked_v16i8_pre(i8* %y, i8* %x, <16 x i8> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr -; CHECK-LE-NEXT: vstrbt.8 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrbt.8 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v16i8_pre: @@ -427,8 +418,7 @@ define i8* @masked_v16i8_pre(i8* %y, i8* %x, <16 x i8> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.8 q2, q0 ; CHECK-BE-NEXT: vpt.s8 gt, q2, zr -; CHECK-BE-NEXT: vstrbt.8 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrbt.8 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -447,8 +437,7 @@ define i8* @masked_v16i8_post(i8* %y, i8* %x, <16 x i8> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr -; CHECK-LE-NEXT: vstrbt.8 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrbt.8 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v16i8_post: @@ -458,8 +447,7 @@ define i8* @masked_v16i8_post(i8* %y, i8* %x, <16 x i8> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.8 q2, q0 ; CHECK-BE-NEXT: vpt.s8 gt, q2, zr -; CHECK-BE-NEXT: vstrbt.8 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrbt.8 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -591,8 +579,7 @@ define i8* @masked_v4f32_pre(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v4f32_pre: @@ -602,8 +589,7 @@ define i8* @masked_v4f32_pre(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.32 q2, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr -; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -622,8 +608,7 @@ define i8* @masked_v4f32_post(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr -; CHECK-LE-NEXT: vstrwt.32 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrwt.32 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v4f32_post: @@ -633,8 +618,7 @@ define i8* @masked_v4f32_post(i8* %y, i8* %x, <4 x i32> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.32 q2, q0 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr -; CHECK-BE-NEXT: vstrwt.32 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrwt.32 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -904,8 +888,7 @@ define i8* @masked_v8f16_pre(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4]! ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v8f16_pre: @@ -915,8 +898,7 @@ define i8* @masked_v8f16_pre(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q2, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr -; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4]! ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -935,8 +917,7 @@ define i8* @masked_v8f16_post(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-LE-NEXT: vldrw.u32 q1, [r1] ; CHECK-LE-NEXT: vmov d0, r2, r3 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr -; CHECK-LE-NEXT: vstrht.16 q1, [r0] -; CHECK-LE-NEXT: adds r0, #4 +; CHECK-LE-NEXT: vstrht.16 q1, [r0], #4 ; CHECK-LE-NEXT: bx lr ; ; CHECK-BE-LABEL: masked_v8f16_post: @@ -946,8 +927,7 @@ define i8* @masked_v8f16_post(i8* %y, i8* %x, <8 x i16> %a) { ; CHECK-BE-NEXT: vmov d0, r3, r2 ; CHECK-BE-NEXT: vrev64.16 q2, q0 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr -; CHECK-BE-NEXT: vstrht.16 q1, [r0] -; CHECK-BE-NEXT: adds r0, #4 +; CHECK-BE-NEXT: vstrht.16 q1, [r0], #4 ; CHECK-BE-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 From 4965779f171343fc7e74276a548ec30906a740d0 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 26 Nov 2019 16:18:58 +0000 Subject: [PATCH 058/591] [ARM] Clean up the load and store code. NFC Some of these patterns have grown quite organically. I've tried to organise them a little here, moving all the PatFlags together and giving them a more consistent naming scheme, to allow some of the later patterns to be merged into a single multiclass. Differential Revision: https://reviews.llvm.org/D70178 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 509 ++++++++++++++--------------- 1 file changed, 246 insertions(+), 263 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index dd8c032dae423..31fee84596b3c 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5288,65 +5288,7 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> { // Patterns //===----------------------------------------------------------------------===// -class MVE_vector_store_typed - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; -class MVE_vector_maskedstore_typed - : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr, VCCR:$pred), - (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr, (i32 1), VCCR:$pred)>; - -multiclass MVE_vector_store { - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; - def : MVE_vector_store_typed; -} - -class MVE_vector_load_typed - : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), - (Ty (RegImmInst t2addrmode_imm7:$addr))>; -class MVE_vector_maskedload_typed - : Pat<(Ty (LoadKind t2addrmode_imm7:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), - (Ty (RegImmInst t2addrmode_imm7:$addr, (i32 1), VCCR:$pred))>; - -multiclass MVE_vector_load { - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; - def : MVE_vector_load_typed; -} - -class MVE_vector_offset_store_typed - : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), - (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; -class MVE_vector_offset_maskedstore_typed - : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr, VCCR:$pred), - (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr, (i32 1), VCCR:$pred)>; - -multiclass MVE_vector_offset_store { - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; - def : MVE_vector_offset_store_typed; -} +// PatFrags for loads and stores. Often trying to keep semi-consistent names. def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (pre_store node:$val, node:$ptr, node:$offset), [{ @@ -5366,79 +5308,69 @@ def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), }]>; -def maskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); return Ld->getMemoryVT().getScalarType() == MVT::i8; }]>; -def sextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_sextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ return cast(N)->getExtensionType() == ISD::SEXTLOAD; }]>; -def zextmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_zextmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ return cast(N)->getExtensionType() == ISD::ZEXTLOAD; }]>; -def extmaskedload8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (maskedload8 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_extmaskedloadvi8 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi8 node:$ptr, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; -def alignedmaskedload16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi16: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && Ld->getAlignment() >= 2; }]>; -def sextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_sextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ return cast(N)->getExtensionType() == ISD::SEXTLOAD; }]>; -def zextmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_zextmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ return cast(N)->getExtensionType() == ISD::ZEXTLOAD; }]>; -def extmaskedload16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (alignedmaskedload16 node:$ptr, node:$pred, node:$passthru), [{ +def aligned_extmaskedloadvi16 : PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (aligned_maskedloadvi16 node:$ptr, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return ScalarVT.isInteger() && Ld->getExtensionType() == ISD::EXTLOAD; }]>; -def alignedmaskedload32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), - (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ +def aligned_maskedloadvi32: PatFrag<(ops node:$ptr, node:$pred, node:$passthru), + (masked_ld node:$ptr, undef, node:$pred, node:$passthru), [{ auto *Ld = cast(N); EVT ScalarVT = Ld->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && Ld->getAlignment() >= 4; }]>; -def maskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, undef, node:$pred), [{ +def aligned_maskedstvi8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; -def truncatingmaskedstore8 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (maskedstore8 node:$val, node:$ptr, node:$pred), [{ - return cast(N)->isTruncatingStore(); -}]>; -def maskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, undef, node:$pred), [{ +def aligned_maskedstvi16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ auto *St = cast(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; }]>; - -def truncatingmaskedstore16 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (maskedstore16 node:$val, node:$ptr, node:$pred), [{ - return cast(N)->isTruncatingStore(); -}]>; -def maskedstore32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), - (masked_st node:$val, node:$ptr, undef, node:$pred), [{ +def aligned_maskedstvi32 : PatFrag<(ops node:$val, node:$ptr, node:$pred), + (masked_st node:$val, node:$ptr, undef, node:$pred), [{ auto *St = cast(N); EVT ScalarVT = St->getMemoryVT().getScalarType(); return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; }]>; - def pre_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$mask), (masked_st node:$val, node:$base, node:$offset, node:$mask), [{ ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); @@ -5449,24 +5381,177 @@ def post_maskedstore : PatFrag<(ops node:$val, node:$base, node:$offset, node:$m ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); return AM == ISD::POST_INC || AM == ISD::POST_DEC; }]>; -def aligned32_pre_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), - (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ - return cast(N)->getAlignment() >= 4; +def aligned_pre_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; -def aligned32_post_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), - (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ - return cast(N)->getAlignment() >= 4; +def aligned_post_maskedstorevi8 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; }]>; -def aligned16_pre_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), - (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ - return cast(N)->getAlignment() >= 2; +def aligned_pre_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; }]>; -def aligned16_post_maskedstore : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), - (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ - return cast(N)->getAlignment() >= 2; +def aligned_post_maskedstorevi16 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def aligned_pre_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (pre_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; +}]>; +def aligned_post_maskedstorevi32 : PatFrag<(ops node:$val, node:$ptr, node:$offset, node:$mask), + (post_maskedstore node:$val, node:$ptr, node:$offset, node:$mask), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i32 || ScalarVT == MVT::f32) && St->getAlignment() >= 4; }]>; +// PatFrags for "Aligned" extending / truncating + +def aligned_extloadvi8 : PatFrag<(ops node:$ptr), (extloadvi8 node:$ptr)>; +def aligned_sextloadvi8 : PatFrag<(ops node:$ptr), (sextloadvi8 node:$ptr)>; +def aligned_zextloadvi8 : PatFrag<(ops node:$ptr), (zextloadvi8 node:$ptr)>; + +def aligned_truncstvi8 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi8 node:$val, node:$ptr)>; +def aligned_post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi8 node:$val, node:$base, node:$offset)>; +def aligned_pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi8 node:$val, node:$base, node:$offset)>; + +let MinAlignment = 2 in { + def aligned_extloadvi16 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; + def aligned_sextloadvi16 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; + def aligned_zextloadvi16 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; + + def aligned_truncstvi16 : PatFrag<(ops node:$val, node:$ptr), + (truncstorevi16 node:$val, node:$ptr)>; + def aligned_post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi16 node:$val, node:$base, node:$offset)>; + def aligned_pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi16 node:$val, node:$base, node:$offset)>; +} + +def truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$pred), + (masked_st node:$val, node:$base, undef, node:$pred), [{ + return cast(N)->isTruncatingStore(); +}]>; +def aligned_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$pred), + (truncmaskedst node:$val, node:$base, node:$pred), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$pred), + (truncmaskedst node:$val, node:$base, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (masked_st node:$val, node:$base, node:$offset, node:$pred), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return cast(N)->isTruncatingStore() && (AM == ISD::PRE_INC || AM == ISD::PRE_DEC); +}]>; +def aligned_pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_pre_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), + (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; +def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (masked_st node:$val, node:$base, node:$offset, node:$postd), [{ + ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); + return cast(N)->isTruncatingStore() && (AM == ISD::POST_INC || AM == ISD::POST_DEC); +}]>; +def aligned_post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; +def aligned_post_truncmaskedstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), + (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ + auto *St = cast(N); + EVT ScalarVT = St->getMemoryVT().getScalarType(); + return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; +}]>; + +// Load/store patterns + +class MVE_vector_store_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr)>; + +class MVE_vector_maskedstore_typed + : Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7:$addr, VCCR:$pred), + (RegImmInst (Ty MQPR:$val), t2addrmode_imm7:$addr, (i32 1), VCCR:$pred)>; + +multiclass MVE_vector_store { + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; + def : MVE_vector_store_typed; +} + +class MVE_vector_load_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr)), + (Ty (RegImmInst t2addrmode_imm7:$addr))>; + +class MVE_vector_maskedload_typed + : Pat<(Ty (LoadKind t2addrmode_imm7:$addr, VCCR:$pred, (Ty NEONimmAllZerosV))), + (Ty (RegImmInst t2addrmode_imm7:$addr, (i32 1), VCCR:$pred))>; + +multiclass MVE_vector_load { + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; + def : MVE_vector_load_typed; +} + +class MVE_vector_offset_store_typed + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; + +class MVE_vector_offset_maskedstore_typed + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr, VCCR:$pred), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr, (i32 1), VCCR:$pred)>; + +multiclass MVE_vector_offset_store { + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; + def : MVE_vector_offset_store_typed; +} + + let Predicates = [HasMVEInt, IsLE] in { // Stores defm : MVE_vector_store; @@ -5543,175 +5628,73 @@ let Predicates = [HasMVEInt, IsBE] in { let Predicates = [HasMVEInt] in { // Aligned masked store, shared between LE and BE - def : MVE_vector_maskedstore_typed; - def : MVE_vector_maskedstore_typed; - def : MVE_vector_maskedstore_typed; - def : MVE_vector_maskedstore_typed; - def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; + def : MVE_vector_maskedstore_typed; // Pre/Post inc masked stores - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; - def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; + def : MVE_vector_offset_maskedstore_typed; // Aligned masked loads - def : MVE_vector_maskedload_typed; - def : MVE_vector_maskedload_typed; - def : MVE_vector_maskedload_typed; - def : MVE_vector_maskedload_typed; - def : MVE_vector_maskedload_typed; - - // Extending masked loads. - def : Pat<(v8i16 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBS16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (sextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBS32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v8i16 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (zextmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v8i16 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v8i16 NEONimmAllZerosV))), - (v8i16 (MVE_VLDRBU16 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (extmaskedload8 t2addrmode_imm7<0>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRBU32 t2addrmode_imm7<0>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (sextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHS32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (zextmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; - def : Pat<(v4i32 (extmaskedload16 t2addrmode_imm7<1>:$addr, VCCR:$pred, - (v4i32 NEONimmAllZerosV))), - (v4i32 (MVE_VLDRHU32 t2addrmode_imm7<1>:$addr, (i32 1), VCCR:$pred))>; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; + def : MVE_vector_maskedload_typed; } // Widening/Narrowing Loads/Stores -let MinAlignment = 2 in { - def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr), - (truncstorevi16 node:$val, node:$ptr)>; - def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), - (post_truncstvi16 node:$val, node:$base, node:$offset)>; - def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), - (pre_truncstvi16 node:$val, node:$base, node:$offset)>; -} - -def pre_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), - (masked_st node:$val, node:$base, node:$offset, node:$pred), [{ - ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); - return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; -}]>; -def pre_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), - (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def pre_truncmaskedstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$pred), - (pre_truncmaskedst node:$val, node:$base, node:$offset, node:$pred), [{ - auto *St = cast(N); - EVT ScalarVT = St->getMemoryVT().getScalarType(); - return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; -}]>; -def post_truncmaskedst : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), - (masked_st node:$val, node:$base, node:$offset, node:$postd), [{ - ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); - return AM == ISD::POST_INC || AM == ISD::POST_DEC; -}]>; -def post_truncmaskedstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), - (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def post_truncmaskedstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset, node:$postd), - (post_truncmaskedst node:$val, node:$base, node:$offset, node:$postd), [{ - auto *St = cast(N); - EVT ScalarVT = St->getMemoryVT().getScalarType(); - return (ScalarVT == MVT::i16 || ScalarVT == MVT::f16) && St->getAlignment() >= 2; -}]>; - -let Predicates = [HasMVEInt] in { - def : Pat<(truncstorevi8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr), - (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr)>; - def : Pat<(truncstorevi8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr), - (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr)>; - def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr), - (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr)>; - - def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), - (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; - - def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), - (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; - def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), - (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; - - def : Pat<(truncatingmaskedstore8 (v8i16 MQPR:$val), taddrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB16 MQPR:$val, taddrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore8 (v4i32 MQPR:$val), taddrmode_imm7<0>:$addr, VCCR:$pred), - (MVE_VSTRB32 MQPR:$val, taddrmode_imm7<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(truncatingmaskedstore16 (v4i32 MQPR:$val), taddrmode_imm7<1>:$addr, VCCR:$pred), - (MVE_VSTRH32 MQPR:$val, taddrmode_imm7<1>:$addr, (i32 1), VCCR:$pred)>; - - def : Pat<(post_truncmaskedstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), - (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(post_truncmaskedstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), - (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(post_truncmaskedstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr, VCCR:$pred), - (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr, (i32 1), VCCR:$pred)>; - - def : Pat<(pre_truncmaskedstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), - (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(pre_truncmaskedstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr, VCCR:$pred), - (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr, (i32 1), VCCR:$pred)>; - def : Pat<(pre_truncmaskedstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr, VCCR:$pred), - (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr, (i32 1), VCCR:$pred)>; -} - - -let MinAlignment = 2 in { - def extloadvi16_align2 : PatFrag<(ops node:$ptr), (extloadvi16 node:$ptr)>; - def sextloadvi16_align2 : PatFrag<(ops node:$ptr), (sextloadvi16 node:$ptr)>; - def zextloadvi16_align2 : PatFrag<(ops node:$ptr), (zextloadvi16 node:$ptr)>; -} - -multiclass MVEExtLoad { - def _Any : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("extloadvi" # SrcElemBits # Align) am:$addr)), - (!cast("MVE_VLDR" # SrcElemType # "U" # DestElemBits) - am:$addr)>; - def _Z : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("zextloadvi" # SrcElemBits # Align) am:$addr)), - (!cast("MVE_VLDR" # SrcElemType # "U" # DestElemBits) - am:$addr)>; - def _S : Pat<(!cast("v" # DestLanes # "i" # DestElemBits) - (!cast("sextloadvi" # SrcElemBits # Align) am:$addr)), - (!cast("MVE_VLDR" # SrcElemType # "S" # DestElemBits) - am:$addr)>; +multiclass MVEExtLoadStore { + // Trunc stores + def : Pat<(!cast("aligned_truncst"#Amble) (VT MQPR:$val), taddrmode_imm7:$addr), + (!cast(StoreInst) MQPR:$val, taddrmode_imm7:$addr)>; + def : Pat<(!cast("aligned_post_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (!cast(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; + def : Pat<(!cast("aligned_pre_truncst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (!cast(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; + + // Masked trunc stores + def : Pat<(!cast("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7:$addr, VCCR:$pred), + (!cast(StoreInst) MQPR:$val, taddrmode_imm7:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(!cast("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr, VCCR:$pred), + (!cast(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr, (i32 1), VCCR:$pred)>; + def : Pat<(!cast("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr, VCCR:$pred), + (!cast(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr, (i32 1), VCCR:$pred)>; + + // Ext loads + def : Pat<(VT (!cast("aligned_extload"#Amble) taddrmode_imm7:$addr)), + (VT (LoadUInst taddrmode_imm7:$addr))>; + def : Pat<(VT (!cast("aligned_sextload"#Amble) taddrmode_imm7:$addr)), + (VT (LoadSInst taddrmode_imm7:$addr))>; + def : Pat<(VT (!cast("aligned_zextload"#Amble) taddrmode_imm7:$addr)), + (VT (LoadUInst taddrmode_imm7:$addr))>; + + // Masked ext loads + def : Pat<(VT (!cast("aligned_extmaskedload"#Amble) taddrmode_imm7:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadUInst taddrmode_imm7:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(VT (!cast("aligned_sextmaskedload"#Amble) taddrmode_imm7:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadSInst taddrmode_imm7:$addr, (i32 1), VCCR:$pred))>; + def : Pat<(VT (!cast("aligned_zextmaskedload"#Amble) taddrmode_imm7:$addr, VCCR:$pred, (VT NEONimmAllZerosV))), + (VT (LoadUInst taddrmode_imm7:$addr, (i32 1), VCCR:$pred))>; } let Predicates = [HasMVEInt] in { - defm : MVEExtLoad<"4", "32", "8", "B", "", taddrmode_imm7<0>>; - defm : MVEExtLoad<"8", "16", "8", "B", "", taddrmode_imm7<0>>; - defm : MVEExtLoad<"4", "32", "16", "H", "_align2", taddrmode_imm7<1>>; + defm : MVEExtLoadStore; + defm : MVEExtLoadStore; + defm : MVEExtLoadStore; } From 87c3f4a5e0bb53ca0d9799ca627e0897b10a82b3 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 26 Nov 2019 11:10:58 -0500 Subject: [PATCH 059/591] [OPENMP]Simplify printing of declare variant attribute, NFC. --- clang/include/clang/Basic/Attr.td | 51 +++++++++++++++++-------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 5d9e5dd59596c..21cf53f0a815e 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -3335,20 +3335,40 @@ def OMPDeclareVariant : InheritableAttr { } // TODO: add printing of real context selectors. OS << " match("; + int Used[OMP_CTX_SET_unknown] = {0}; for (unsigned I = 0, E = ctxSelectorSets_size(); I < E; ++I) { auto CtxSet = static_cast( *std::next(ctxSelectorSets_begin(), I)); - auto Ctx = static_cast( - *std::next(ctxSelectors_begin(), I)); - assert(CtxSet != OMP_CTX_SET_unknown && Ctx != OMP_CTX_unknown && - "Unknown context selector."); + if (Used[CtxSet]) + continue; + if (I > 0) + OS << ","; switch (CtxSet) { case OMP_CTX_SET_implementation: OS << "implementation={"; + break; + case OMP_CTX_SET_device: + OS << "device={"; + break; + case OMP_CTX_SET_unknown: + llvm_unreachable("Unknown context selector set."); + } + Used[CtxSet] = 1; + for (unsigned K = I, EK = ctxSelectors_size(); K < EK; ++K) { + auto CtxSetK = static_cast( + *std::next(ctxSelectorSets_begin(), K)); + if (CtxSet != CtxSetK) + continue; + if (K != I) + OS << ","; + auto Ctx = static_cast( + *std::next(ctxSelectors_begin(), K)); switch (Ctx) { case OMP_CTX_vendor: + assert(CtxSet == OMP_CTX_SET_implementation && + "Expected implementation context selector set."); OS << "vendor("; - printScore(OS, Policy, I); + printScore(OS, Policy, K); if (implVendors_size() > 0) { OS << *implVendors(). begin(); for (StringRef VendorName : llvm::drop_begin(implVendors(), 1)) @@ -3357,16 +3377,8 @@ def OMPDeclareVariant : InheritableAttr { OS << ")"; break; case OMP_CTX_kind: - llvm_unreachable("Unexpected context selector in implementation set."); - case OMP_CTX_unknown: - llvm_unreachable("Unknown context selector."); - } - OS << "}"; - break; - case OMP_CTX_SET_device: - OS << "device={"; - switch (Ctx) { - case OMP_CTX_kind: + assert(CtxSet == OMP_CTX_SET_device && + "Expected device context selector set."); OS << "kind("; if (deviceKinds_size() > 0) { OS << *deviceKinds().begin(); @@ -3375,18 +3387,11 @@ def OMPDeclareVariant : InheritableAttr { } OS << ")"; break; - case OMP_CTX_vendor: - llvm_unreachable("Unexpected context selector in device set."); case OMP_CTX_unknown: llvm_unreachable("Unknown context selector."); } - OS << "}"; - break; - case OMP_CTX_SET_unknown: - llvm_unreachable("Unknown context selector set."); } - if (I != E - 1) - OS << ","; + OS << "}"; } OS << ")"; } From 49a2b2a3d2c869cb10407c480fff2f832e080018 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 26 Nov 2019 11:37:36 -0500 Subject: [PATCH 060/591] [OPENMP]Remove tab in message, NFC. --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c19862addec91..746320fa526b0 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9292,7 +9292,7 @@ def ext_omp_loop_not_canonical_init : ExtWarn< "('var = init' or 'T var = init')">, InGroup; def err_omp_loop_not_canonical_cond : Error< "condition of OpenMP for loop must be a relational comparison " - "('<', '<=', '>', %select{or '>='|'>=', or '!='}0) of loop variable %1">; + "('<', '<=', '>', %select{or '>='|'>=', or '!='}0) of loop variable %1">; def err_omp_loop_not_canonical_incr : Error< "increment clause of OpenMP for loop must perform simple addition " "or subtraction on loop variable %0">; From a2fc96441788fba1e4709d63677f34ed8e321dae Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 20 Nov 2019 11:16:15 -0800 Subject: [PATCH 061/591] [ELF] Replace SymbolTable::forEachSymbol with iterator_range symbols() D62381 introduced forEachSymbol(). It seems that many call sites cannot be parallelized because the body shared some states. Replace forEachSymbol with iterator_range> symbols() to simplify code and improve debuggability (std::function calls take some frames). It also allows us to use early return to simplify code added in D69650. Reviewed By: grimar Differential Revision: https://reviews.llvm.org/D70505 --- lld/ELF/Driver.cpp | 19 +++++++++---------- lld/ELF/LTO.cpp | 4 ++-- lld/ELF/MarkLive.cpp | 6 ++---- lld/ELF/Relocations.cpp | 7 ++++--- lld/ELF/SymbolTable.h | 16 ++++++++++------ lld/ELF/Writer.cpp | 16 +++++++--------- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index b13bb5e00def3..a0987259d24ba 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1408,13 +1408,13 @@ static void handleUndefinedGlob(StringRef arg) { } std::vector syms; - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { // Calling Sym->fetch() from here is not safe because it may // add new symbols to the symbol table, invalidating the // current iterator. So we just keep a note. if (pat->match(sym->getName())) syms.push_back(sym); - }); + } for (Symbol *sym : syms) handleUndefined(sym); @@ -1440,10 +1440,10 @@ static void handleLibcall(StringRef name) { // result, the passes after the symbol resolution won't see any // symbols of type CommonSymbol. static void replaceCommonSymbols() { - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { auto *s = dyn_cast(sym); if (!s) - return; + continue; auto *bss = make("COMMON", s->size, s->alignment); bss->file = s->file; @@ -1451,7 +1451,7 @@ static void replaceCommonSymbols() { inputSections.push_back(bss); s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, /*value=*/0, s->size, bss}); - }); + } } // If all references to a DSO happen to be weak, the DSO is not added @@ -1459,15 +1459,15 @@ static void replaceCommonSymbols() { // created from the DSO. Otherwise, they become dangling references // that point to a non-existent DSO. static void demoteSharedSymbols() { - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { auto *s = dyn_cast(sym); if (!s || s->getFile().isNeeded) - return; + continue; bool used = s->used; s->replace(Undefined{nullptr, s->getName(), STB_WEAK, s->stOther, s->type}); s->used = used; - }); + } } // The section referred to by `s` is considered address-significant. Set the @@ -1503,10 +1503,9 @@ static void findKeepUniqueSections(opt::InputArgList &args) { // Symbols in the dynsym could be address-significant in other executables // or DSOs, so we conservatively mark them as address-significant. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->includeInDynsym()) markAddrsig(sym); - }); // Visit the address-significance table in each object file and mark each // referenced symbol as address-significant. diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 6da409568c8b1..524d552b0b84d 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -145,12 +145,12 @@ BitcodeCompiler::BitcodeCompiler() { config->ltoPartitions); // Initialize usedStartStop. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { StringRef s = sym->getName(); for (StringRef prefix : {"__start_", "__stop_"}) if (s.startswith(prefix)) usedStartStop.insert(s.substr(prefix.size())); - }); + } } BitcodeCompiler::~BitcodeCompiler() = default; diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 62fb8fe83a2ef..bb0105c289282 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -219,10 +219,9 @@ template void MarkLive::run() { // Preserve externally-visible symbols if the symbols defined by this // file can interrupt other ELF file's symbols at runtime. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->includeInDynsym() && sym->partition == partition) markSymbol(sym); - }); // If this isn't the main partition, that's all that we need to preserve. if (partition != 1) { @@ -330,11 +329,10 @@ template void markLive() { sec->markLive(); // If a DSO defines a symbol referenced in a regular object, it is needed. - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (auto *s = dyn_cast(sym)) if (s->isUsedInRegularObj && !s->isWeak()) s->getFile().isNeeded = true; - }); return; } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index a4fc1ffbd1e72..80e1de24316fd 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -799,10 +799,11 @@ static const Symbol *getAlternativeSpelling(const Undefined &sym, break; } if (!s) - symtab->forEachSymbol([&](Symbol *sym) { - if (!s && canSuggestExternCForCXX(name, sym->getName())) + for (Symbol *sym : symtab->symbols()) + if (canSuggestExternCForCXX(name, sym->getName())) { s = sym; - }); + break; + } if (s) { pre_hint = " to declare "; post_hint = " as extern \"C\"?"; diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index d3be0cb6450f9..507af8d2be75d 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -32,15 +32,19 @@ namespace elf { // add*() functions, which are called by input files as they are parsed. There // is one add* function per symbol type. class SymbolTable { -public: - void wrap(Symbol *sym, Symbol *real, Symbol *wrap); + struct FilterOutPlaceholder { + bool operator()(Symbol *S) const { return !S->isPlaceholder(); } + }; + using iterator = llvm::filter_iterator::const_iterator, + FilterOutPlaceholder>; - void forEachSymbol(llvm::function_ref fn) { - for (Symbol *sym : symVector) - if (!sym->isPlaceholder()) - fn(sym); +public: + llvm::iterator_range symbols() const { + return llvm::make_filter_range(symVector, FilterOutPlaceholder()); } + void wrap(Symbol *sym, Symbol *real, Symbol *wrap); + Symbol *insert(StringRef name); Symbol *addSymbol(const Symbol &newSym); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 3de1230150d64..ebca612f77af7 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1238,10 +1238,9 @@ static DenseMap buildSectionOrder() { // We want both global and local symbols. We get the global ones from the // symbol table and iterate the object files for the local ones. - symtab->forEachSymbol([&](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (!sym->isLazy()) addSym(*sym); - }); for (InputFile *file : objectFiles) for (Symbol *sym : file->getSymbols()) @@ -1734,8 +1733,8 @@ template void Writer::finalizeSections() { for (Partition &part : partitions) finalizeSynthetic(part.ehFrame); - symtab->forEachSymbol( - [](Symbol *s) { s->isPreemptible = computeIsPreemptible(*s); }); + for (Symbol *sym : symtab->symbols()) + sym->isPreemptible = computeIsPreemptible(*sym); // Change values of linker-script-defined symbols from placeholders (assigned // by declareSymbols) to actual definitions. @@ -1769,19 +1768,18 @@ template void Writer::finalizeSections() { return symtab->soNames.count(needed); }); - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) if (sym->isUndefined() && !sym->isWeak()) if (auto *f = dyn_cast_or_null(sym->file)) if (f->allNeededIsKnown) error(toString(f) + ": undefined reference to " + toString(*sym)); - }); } // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. - symtab->forEachSymbol([](Symbol *sym) { + for (Symbol *sym : symtab->symbols()) { if (!includeInSymtab(*sym)) - return; + continue; if (in.symTab) in.symTab->addSymbol(sym); @@ -1791,7 +1789,7 @@ template void Writer::finalizeSections() { if (file->isNeeded && !sym->isUndefined()) addVerneed(sym); } - }); + } // We also need to scan the dynamic relocation tables of the other partitions // and add any referenced symbols to the partition's dynsym. From 54a366f5156edc34019d5f04fff6844848d87f99 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 20 Nov 2019 10:55:04 -0800 Subject: [PATCH 062/591] [ELF] Add a corrector for case mismatch problems Reviewed By: grimar, peter.smith Differential Revision: https://reviews.llvm.org/D70506 --- lld/ELF/Relocations.cpp | 8 ++++++++ lld/test/ELF/undef-spell-corrector.s | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 80e1de24316fd..1b8dacb36627d 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -777,6 +777,14 @@ static const Symbol *getAlternativeSpelling(const Undefined &sym, return s; } + // Case mismatch, e.g. Foo vs FOO. + for (auto &it : map) + if (name.equals_lower(it.first)) + return it.second; + for (Symbol *sym : symtab->symbols()) + if (!sym->isUndefined() && name.equals_lower(sym->getName())) + return sym; + // The reference may be a mangled name while the definition is not. Suggest a // missing extern "C". if (name.startswith("_Z")) { diff --git a/lld/test/ELF/undef-spell-corrector.s b/lld/test/ELF/undef-spell-corrector.s index 174c8009cba8d..3ad2421a6cd63 100644 --- a/lld/test/ELF/undef-spell-corrector.s +++ b/lld/test/ELF/undef-spell-corrector.s @@ -63,6 +63,16 @@ # CONST-NEXT: >>> referenced by {{.*}} # CONST-NEXT: >>> did you mean: foo(int const*) +## Case mismatch. +# RUN: echo 'call _Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o +# RUN: not ld.lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s +# RUN: echo '_Z3fooPKi: call _Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o +# RUN: not ld.lld %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s + +# CASE: error: undefined symbol: FOO(int const*) +# CASE-NEXT: >>> referenced by {{.*}} +# CASE-NEXT: >>> did you mean: foo(int const*) + .globl _start, abcde, _Z3fooPKi _start: abcde: From cd9c915d2ac0e6660593c76c63827a4f4d3257d4 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 21 Nov 2019 17:05:27 -0800 Subject: [PATCH 063/591] [Object][RISCV][test] Improve DebugInfo/RISCV/relax-debug-frame.ll Reviewed By: luismarques Differential Revision: https://reviews.llvm.org/D70578 --- .../test/DebugInfo/RISCV/relax-debug-frame.ll | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll b/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll index 24ba037762a9e..4a767aaf62355 100644 --- a/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll +++ b/llvm/test/DebugInfo/RISCV/relax-debug-frame.ll @@ -1,19 +1,15 @@ -; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o - \ -; RUN: | llvm-readobj -r | FileCheck -check-prefix=RELAX %s -; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o - \ -; RUN: | llvm-dwarfdump --debug-frame - 2>&1 \ +; RUN: llc -filetype=obj -mtriple=riscv32 -mattr=+relax %s -o %t.o +; RUN: llvm-readobj -r %t.o | FileCheck -check-prefix=RELAX %s +; RUN: llvm-dwarfdump --debug-frame %t.o 2>&1 \ ; RUN: | FileCheck -check-prefix=RELAX-DWARFDUMP %s ; -; RELAX: Section{{.*}}.rela.{{eh|debug}}_frame { -; RELAX-NOT: {{[}]}} -; RELAX-NOT: 0x0 R_RISCV_ADD32 -; RELAX-NOT: 0x0 R_RISCV_SUB32 -; RELAX-NOT: {{[}]}} -; RELAX: 0x20 R_RISCV_ADD32 -; RELAX: 0x20 R_RISCV_SUB32 -; RELAX-NOT: {{[}]}} -; RELAX: 0x39 R_RISCV_SET6 -; RELAX: 0x39 R_RISCV_SUB6 +; RELAX: Section ({{.*}}) .rela.eh_frame { +; RELAX-NEXT: 0x1C R_RISCV_32_PCREL - 0x0 +; RELAX-NEXT: 0x20 R_RISCV_ADD32 - 0x0 +; RELAX-NEXT: 0x20 R_RISCV_SUB32 - 0x0 +; RELAX-NOT: } +; RELAX: 0x39 R_RISCV_SET6 - 0x0 +; RELAX-NEXT: 0x39 R_RISCV_SUB6 - 0x0 ; ; RELAX-DWARFDUMP-NOT: error: failed to compute relocation ; RELAX-DWARFDUMP: CIE From b8cb73dd38664b6e4ff6bbb0a4143c6e209038f0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 26 Nov 2019 10:32:40 -0800 Subject: [PATCH 064/591] [X86] Pre-commit test modifications for D68857. NFC Patch by Chen Liu(LiuChen3) Differential Revision: https://reviews.llvm.org/D70706 --- llvm/test/CodeGen/X86/fp-strict-scalar.ll | 4 ++-- llvm/test/CodeGen/X86/fp80-strict-scalar.ll | 13 ++++--------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll index 8813047636ed2..8d2e7103cfc50 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -16,7 +16,7 @@ declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, me declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) -declare float @llvm.experimental.constrained.fptrunc.f64.f32(double, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) @@ -480,7 +480,7 @@ define void @fptrunc_double_to_f32(double* %val, float *%ret) nounwind strictfp ; X87-NEXT: popl %eax ; X87-NEXT: retl %1 = load double, double* %val, align 8 - %res = call float @llvm.experimental.constrained.fptrunc.f64.f32(double %1, + %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %1, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 store float %res, float* %ret, align 4 diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll index 2795008632204..8b80c01540d36 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -9,8 +9,8 @@ declare x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80, x86_fp80 declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float, metadata) declare x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double, metadata) declare x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80, metadata, metadata) -declare float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80, metadata, metadata) -declare double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80, metadata, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.x86_fp80(x86_fp80, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.f64.x86_fp80(x86_fp80, metadata, metadata) define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X86-LABEL: fadd_fp80: @@ -106,7 +106,6 @@ define x86_fp80 @fpext_f32_to_fp80(float %a) nounwind strictfp { %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %a, metadata !"fpexcept.strict") #0 ret x86_fp80 %ret - } define x86_fp80 @fpext_f64_to_fp80(double %a) nounwind strictfp { @@ -123,7 +122,6 @@ define x86_fp80 @fpext_f64_to_fp80(double %a) nounwind strictfp { %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %a, metadata !"fpexcept.strict") #0 ret x86_fp80 %ret - } define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp { @@ -142,11 +140,10 @@ define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp { ; X64-NEXT: fstps -{{[0-9]+}}(%rsp) ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: retq - %ret = call float @llvm.experimental.constrained.fptrunc.x86_fp80.f32(x86_fp80 %a, + %ret = call float @llvm.experimental.constrained.fptrunc.f32.x86_fp80(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret float %ret - } define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp { @@ -169,11 +166,10 @@ define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp { ; X64-NEXT: fstpl -{{[0-9]+}}(%rsp) ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: retq - %ret = call double @llvm.experimental.constrained.fptrunc.x86_fp80.f64(x86_fp80 %a, + %ret = call double @llvm.experimental.constrained.fptrunc.f64.x86_fp80(x86_fp80 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret double %ret - } define x86_fp80 @fsqrt_fp80(x86_fp80 %a) nounwind strictfp { @@ -192,7 +188,6 @@ define x86_fp80 @fsqrt_fp80(x86_fp80 %a) nounwind strictfp { metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret x86_fp80 %ret - } attributes #0 = { strictfp } From cfce8f2cfba42edd3eb49e6b6484d60fb6aeeb43 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 26 Nov 2019 10:59:18 -0800 Subject: [PATCH 065/591] [X86] Add strict fp support for operations of X87 instructions This is the following patch of D68854. This patch adds basic operations of X87 instructions, including +, -, *, / , fp extensions and fp truncations. Patch by Chen Liu(LiuChen3) Differential Revision: https://reviews.llvm.org/D68857 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 12 ++++++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++++ llvm/lib/Target/X86/X86InstrFPStack.td | 34 ++++++++++----------- llvm/test/CodeGen/X86/fp-strict-scalar.ll | 2 +- llvm/test/CodeGen/X86/fp80-strict-scalar.ll | 4 +-- 5 files changed, 50 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index d8f9c5f7270d2..709ad90c22d7d 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5222,12 +5222,20 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } case ISD::STRICT_FADD: case ISD::STRICT_FSUB: + case ISD::STRICT_FP_ROUND: { + // X87 instructions has enabled these strict fp operation. + bool UsingFp80 = Node->getSimpleValueType(0) == MVT::f80 || + Node->getOperand(1).getSimpleValueType() == MVT::f80; + if (UsingFp80 || (!Subtarget->hasSSE1() && Subtarget->hasX87())) + break; + LLVM_FALLTHROUGH; + } case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: - case ISD::STRICT_FP_ROUND: // FIXME: Remove when we have isel patterns for strict versions of these // nodes. - CurDAG->mutateStrictFPToFP(Node); + if (!TLI->isStrictFPEnabled()) + CurDAG->mutateStrictFPToFP(Node); break; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 32072df268d3f..535493a832291 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -587,6 +587,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); + + // Handle constrained floating-point operations of scalar. + setOperationAction(ISD::STRICT_FMUL , VT, Legal); + setOperationAction(ISD::STRICT_FDIV , VT, Legal); + setOperationAction(ISD::STRICT_FSQRT , VT, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); + // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten + // as Custom. + setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); } } @@ -657,6 +666,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::LLROUND, MVT::f80, Expand); setOperationAction(ISD::LRINT, MVT::f80, Expand); setOperationAction(ISD::LLRINT, MVT::f80, Expand); + + // Handle constrained floating-point operations of scalar. + setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); + // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten + // as Custom. + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal); } // f128 uses xmm registers, but most operations require libcalls. diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 1b7a2ccde51fa..d9cf560831300 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -286,26 +286,26 @@ let Uses = [FPCW], mayRaiseFPException = 1 in { // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling // resources. let hasNoSchedulingInfo = 1 in { -defm ADD : FPBinary_rr; -defm SUB : FPBinary_rr; -defm MUL : FPBinary_rr; -defm DIV : FPBinary_rr; +defm ADD : FPBinary_rr; +defm SUB : FPBinary_rr; +defm MUL : FPBinary_rr; +defm DIV : FPBinary_rr; } // Sets the scheduling resources for the actual NAME#_Fm defintions. let SchedRW = [WriteFAddLd] in { -defm ADD : FPBinary; -defm SUB : FPBinary; -defm SUBR: FPBinary; +defm ADD : FPBinary; +defm SUB : FPBinary; +defm SUBR: FPBinary; } let SchedRW = [WriteFMulLd] in { -defm MUL : FPBinary; +defm MUL : FPBinary; } let SchedRW = [WriteFDivLd] in { -defm DIV : FPBinary; -defm DIVR: FPBinary; +defm DIV : FPBinary; +defm DIVR: FPBinary; } } // Uses = [FPCW], mayRaiseFPException = 1 @@ -366,7 +366,7 @@ defm ABS : FPUnary; let Uses = [FPCW], mayRaiseFPException = 1 in { let SchedRW = [WriteFSqrt80] in -defm SQRT: FPUnary; +defm SQRT: FPUnary; let SchedRW = [WriteFCom] in { let hasSideEffects = 0 in { @@ -790,19 +790,19 @@ def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>; // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. -def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, +def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, +def : Pat<(f80 (any_fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, Requires<[FPStackf64]>; // FP truncations map onto simple pseudo-value conversions if they are to/from // the FP stack. We have validated that only value-preserving truncations make // it through isel. -def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, +def : Pat<(f32 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, +def : Pat<(f64 (any_fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, Requires<[FPStackf64]>; diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll index 8d2e7103cfc50..a61f195735ef9 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64 -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=CHECK,X87 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X87 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll index 8b80c01540d36..e4fcf54e6950a 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -O3 | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O3 | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X64 declare x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) declare x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80, x86_fp80, metadata, metadata) From fe955e6c70e8c26c605751da239a54cd31f8beee Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 26 Nov 2019 11:20:57 -0800 Subject: [PATCH 066/591] TargetPassConfig: const char * -> const char [] The latter has better codegen in non-optimized builds, which do not run ipsccp. --- llvm/lib/CodeGen/TargetPassConfig.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 7b547d41fb60b..41cb511ad9b47 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -179,10 +179,10 @@ static cl::opt UseCFLAA( /// Option names for limiting the codegen pipeline. /// Those are used in error reporting and we didn't want /// to duplicate their names all over the place. -static const char *StartAfterOptName = "start-after"; -static const char *StartBeforeOptName = "start-before"; -static const char *StopAfterOptName = "stop-after"; -static const char *StopBeforeOptName = "stop-before"; +static const char StartAfterOptName[] = "start-after"; +static const char StartBeforeOptName[] = "start-before"; +static const char StopAfterOptName[] = "stop-after"; +static const char StopBeforeOptName[] = "stop-before"; static cl::opt StartAfterOpt(StringRef(StartAfterOptName), From ee3b375b4cb7f3d7739f009e56d7ff1b07e27353 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 26 Nov 2019 11:57:45 -0800 Subject: [PATCH 067/591] [LegalizeDAG] Use getOperationAction instead of getStrictFPOperationAction for STRICT_LRINT/LROUND/LLRINT/LLROUND. --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 17bb98bdddfb5..f3316a0a303ec 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1023,8 +1023,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // These pseudo-ops are the same as the other STRICT_ ops except // they are registered with setOperationAction() using the input type // instead of the output type. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getOperand(1).getValueType()); + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); break; case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast(Node->getOperand(1))->getVT(); From a38fc61648797a10629ed160779b5df6b8d577e7 Mon Sep 17 00:00:00 2001 From: David Tenty Date: Tue, 26 Nov 2019 15:29:49 -0500 Subject: [PATCH 068/591] [AIX] Disable clang python binding tests Summary: The Python ctypes FFI interface is broken on AIX, it cannot properly pass structures containing arrays ( https://bugs.python.org/issue38628). So disable the clang python binding tests on AIX till this is resolved. Reviewers: stevewan, jasonliu, hubert.reinterpretcast, mgorny Reviewed By: jasonliu, hubert.reinterpretcast Subscribers: mgorny, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70675 --- clang/bindings/python/tests/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/bindings/python/tests/CMakeLists.txt b/clang/bindings/python/tests/CMakeLists.txt index 3f5ac957f81d4..626256af9c1b6 100644 --- a/clang/bindings/python/tests/CMakeLists.txt +++ b/clang/bindings/python/tests/CMakeLists.txt @@ -32,6 +32,11 @@ if(WIN32) set(RUN_PYTHON_TESTS FALSE) endif() +# The Python FFI interface is broken on AIX: https://bugs.python.org/issue38628. +if(${CMAKE_SYSTEM_NAME} MATCHES "AIX") + set(RUN_PYTHON_TESTS FALSE) +endif() + # AArch64, Hexagon, and Sparc have known test failures that need to be # addressed. # SystemZ has broken Python/FFI interface: From 9b08366f57468e1ca41906baae4dcf17fc86a442 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 26 Nov 2019 12:52:17 -0800 Subject: [PATCH 069/591] [LegalizeTypes] Add SoftenFloatRes_Unary and SoftenFloatRes_Binary functions to factor repeated patterns out of many of the SoftenFloatRes_* functions This has been factored out of D70654 which will add strict FP support to these functions. By making the helpers we avoid repeating even more code. Differential Revision: https://reviews.llvm.org/D70736 --- .../SelectionDAG/LegalizeFloatTypes.cpp | 436 +++++++----------- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 + 2 files changed, 157 insertions(+), 281 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 8dbff7d273571..1ddf49c73e866 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -112,6 +112,28 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } } +SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(N->getNumOperands() == 1 && "Unexpected number of operands!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpVT = N->getOperand(0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(N->getNumOperands() == 2 && "Unexpected number of operands!"); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N)).first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -175,54 +197,30 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMIN_F32, - RTLIB::FMIN_F64, - RTLIB::FMIN_F80, - RTLIB::FMIN_F128, - RTLIB::FMIN_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMAX_F32, - RTLIB::FMAX_F64, - RTLIB::FMAX_F80, - RTLIB::FMAX_F128, - RTLIB::FMAX_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_F128, - RTLIB::ADD_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { @@ -241,18 +239,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, - RTLIB::CEIL_F64, - RTLIB::CEIL_F80, - RTLIB::CEIL_F128, - RTLIB::CEIL_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -304,125 +296,75 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, - RTLIB::COS_F64, - RTLIB::COS_F80, - RTLIB::COS_F128, - RTLIB::COS_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_F128, + RTLIB::COS_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_F128, - RTLIB::DIV_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, - RTLIB::EXP_F64, - RTLIB::EXP_F80, - RTLIB::EXP_F128, - RTLIB::EXP_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, - RTLIB::EXP2_F64, - RTLIB::EXP2_F80, - RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, - RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, - RTLIB::FLOOR_F128, - RTLIB::FLOOR_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, - RTLIB::LOG_F64, - RTLIB::LOG_F80, - RTLIB::LOG_F128, - RTLIB::LOG_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, - RTLIB::LOG2_F64, - RTLIB::LOG2_F80, - RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, - RTLIB::LOG10_F64, - RTLIB::LOG10_F80, - RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -445,35 +387,21 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_F128, - RTLIB::MUL_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_F128, - RTLIB::NEARBYINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -568,20 +496,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, - RTLIB::POW_F64, - RTLIB::POW_F80, - RTLIB::POW_F128, - RTLIB::POW_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_F128, + RTLIB::POW_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -610,112 +530,66 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, - RTLIB::REM_F64, - RTLIB::REM_F80, - RTLIB::REM_F128, - RTLIB::REM_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, - RTLIB::RINT_F64, - RTLIB::RINT_F80, - RTLIB::RINT_F128, - RTLIB::RINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::ROUND_F32, - RTLIB::ROUND_F64, - RTLIB::ROUND_F80, - RTLIB::ROUND_F128, - RTLIB::ROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, - RTLIB::SIN_F64, - RTLIB::SIN_F80, - RTLIB::SIN_F128, - RTLIB::SIN_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, - RTLIB::SQRT_F64, - RTLIB::SQRT_F80, - RTLIB::SQRT_F128, - RTLIB::SQRT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, - RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, - RTLIB::TRUNC_F128, - RTLIB::TRUNC_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -866,7 +740,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand promotion"); + "Invalid operand softening"); ReplaceValueWith(SDValue(N, 0), Res); return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89410ccd857b0..755a6fe909878 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -483,6 +483,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { // Convert Float Results to Integer. void SoftenFloatResult(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC); + SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); From 11074bfffee022fbbdca177a96dc2eaf2df6d936 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Fri, 22 Nov 2019 01:33:46 +0300 Subject: [PATCH 070/591] [mips] Fix sc, scs, ll, lld instructions expanding There are a couple of bugs with the sc, scs, ll, lld instructions expanding: 1. On R6 these instruction pack immediate offset into a 9-bit field. Now if an immediate exceeds 9-bits assembler does not perform expansion and just rejects such instruction. 2. On 64-bit non-PIC code if an operand is a symbol assembler generates incorrect sequence of instructions. It uses R_MIPS_HI16 and R_MIPS_LO16 relocations and skips R_MIPS_HIGHEST and R_MIPS_HIGHER ones. To solve these problems this patch: - Introduces `mem_simm9_exp` to mark 9-bit memory immediate operands which require expansion. Probably later all `mem_simm9` operands will be able to migrate on `mem_simm9_exp` and we rename it to `mem_simm9`. - Adds new `OPERAND_MEM_SIMM9` operand type and assigns it to the `mem_simm9_exp`. That allows to know operand size in the `processInstruction` method and decide whether we need to expand instruction. - Adds `expandMem9Inst` method to expand instructions with 9-bit memory immediate operand. This method just load immediate into a "base" register used by origibal instruction: sc $2, 256($sp) => addiu $1, $sp, 256 sc $2, 0($1) - Fix `expandMem16Inst` to support a correct set of relocations for symbol loading in case of 64-bit non-PIC code. ll $12, symbol => lui $12, 0 R_MIPS_HIGHEST symbol daddiu $12, $12, 0 R_MIPS_HIGHER symbol dsll $12, $12, 16 daddiu $12, $12, 0 R_MIPS_HI16 symbol dsll $12, $12, 16 ll $12, 0($12) R_MIPS_LO16 symbol - Fix `expandMem16Inst` to unify handling of 3 and 4 operands instructions. - Delete unused now `MipsTargetStreamer::emitSCWithSymOffset` method. Task for next patches - implement expanding for other instructions use `mem_simm9` operand and other `mem_simm##` operands. Differential Revision: https://reviews.llvm.org/D70648 --- .../Target/Mips/AsmParser/MipsAsmParser.cpp | 169 +++++-- .../Target/Mips/MCTargetDesc/MipsBaseInfo.h | 7 + .../Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 1 + .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 35 -- llvm/lib/Target/Mips/Mips32r6InstrInfo.td | 4 +- llvm/lib/Target/Mips/Mips64r6InstrInfo.td | 4 +- llvm/lib/Target/Mips/MipsInstrInfo.td | 7 + llvm/lib/Target/Mips/MipsTargetStreamer.h | 4 - llvm/test/MC/Mips/ll-expansion.s | 406 ++++++++++++++++ llvm/test/MC/Mips/lld-expansion.s | 188 ++++++++ llvm/test/MC/Mips/sc-expansion.s | 442 ++++++++++++++++-- llvm/test/MC/Mips/scd-expansion.s | 188 ++++++++ 12 files changed, 1325 insertions(+), 130 deletions(-) create mode 100644 llvm/test/MC/Mips/ll-expansion.s create mode 100644 llvm/test/MC/Mips/lld-expansion.s create mode 100644 llvm/test/MC/Mips/scd-expansion.s diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 7f52812179534..639ee2df96a9d 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -252,8 +252,10 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); - void expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI, bool IsLoad); + void expandMem16Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad); + void expandMem9Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad); bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); @@ -1824,11 +1826,14 @@ static bool needsExpandMemInst(MCInst &Inst) { const MCOperandInfo &OpInfo = MCID.OpInfo[NumOp - 1]; if (OpInfo.OperandType != MCOI::OPERAND_MEMORY && - OpInfo.OperandType != MCOI::OPERAND_UNKNOWN) + OpInfo.OperandType != MCOI::OPERAND_UNKNOWN && + OpInfo.OperandType != MipsII::OPERAND_MEM_SIMM9) return false; MCOperand &Op = Inst.getOperand(NumOp - 1); if (Op.isImm()) { + if (OpInfo.OperandType == MipsII::OPERAND_MEM_SIMM9) + return !isInt<9>(Op.getImm()); // Offset can't exceed 16bit value. return !isInt<16>(Op.getImm()); } @@ -2133,7 +2138,15 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, // Check the offset of memory operand, if it is a symbol // reference or immediate we may have to expand instructions. if (needsExpandMemInst(Inst)) { - expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + switch (MCID.OpInfo[MCID.getNumOperands() - 1].OperandType) { + case MipsII::OPERAND_MEM_SIMM9: + expandMem9Inst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + break; + default: + expandMem16Inst(Inst, IDLoc, Out, STI, MCID.mayLoad()); + break; + } return getParser().hasPendingError(); } } @@ -3631,20 +3644,26 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return false; } -void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, - const MCSubtargetInfo *STI, bool IsLoad) { - const MCOperand &DstRegOp = Inst.getOperand(0); +void MipsAsmParser::expandMem16Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad) { + unsigned NumOp = Inst.getNumOperands(); + assert((NumOp == 3 || NumOp == 4) && "unexpected operands number"); + unsigned StartOp = NumOp == 3 ? 0 : 1; + + const MCOperand &DstRegOp = Inst.getOperand(StartOp); assert(DstRegOp.isReg() && "expected register operand kind"); - const MCOperand &BaseRegOp = Inst.getOperand(1); + const MCOperand &BaseRegOp = Inst.getOperand(StartOp + 1); assert(BaseRegOp.isReg() && "expected register operand kind"); + const MCOperand &OffsetOp = Inst.getOperand(StartOp + 2); MipsTargetStreamer &TOut = getTargetStreamer(); + unsigned OpCode = Inst.getOpcode(); unsigned DstReg = DstRegOp.getReg(); unsigned BaseReg = BaseRegOp.getReg(); unsigned TmpReg = DstReg; - const MCInstrDesc &Desc = getInstDesc(Inst.getOpcode()); - int16_t DstRegClass = Desc.OpInfo[0].RegClass; + const MCInstrDesc &Desc = getInstDesc(OpCode); + int16_t DstRegClass = Desc.OpInfo[StartOp].RegClass; unsigned DstRegClassID = getContext().getRegisterInfo()->getRegClass(DstRegClass).getID(); bool IsGPR = (DstRegClassID == Mips::GPR32RegClassID) || @@ -3658,25 +3677,12 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return; } - if (Inst.getNumOperands() > 3) { - const MCOperand &BaseRegOp = Inst.getOperand(2); - assert(BaseRegOp.isReg() && "expected register operand kind"); - const MCOperand &ExprOp = Inst.getOperand(3); - assert(ExprOp.isExpr() && "expected expression oprand kind"); - - unsigned BaseReg = BaseRegOp.getReg(); - const MCExpr *ExprOffset = ExprOp.getExpr(); - - MCOperand LoOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); - MCOperand HiOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); - TOut.emitSCWithSymOffset(Inst.getOpcode(), DstReg, BaseReg, HiOperand, - LoOperand, TmpReg, IDLoc, STI); - return; - } - - const MCOperand &OffsetOp = Inst.getOperand(2); + auto emitInstWithOffset = [&](const MCOperand &Off) { + if (NumOp == 3) + TOut.emitRRX(OpCode, DstReg, TmpReg, Off, IDLoc, STI); + else + TOut.emitRRRX(OpCode, DstReg, DstReg, TmpReg, Off, IDLoc, STI); + }; if (OffsetOp.isImm()) { int64_t LoOffset = OffsetOp.getImm() & 0xffff; @@ -3690,16 +3696,16 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, bool IsLargeOffset = HiOffset != 0; if (IsLargeOffset) { - bool Is32BitImm = (HiOffset >> 32) == 0; + bool Is32BitImm = isInt<32>(OffsetOp.getImm()); if (loadImmediate(HiOffset, TmpReg, Mips::NoRegister, Is32BitImm, true, IDLoc, Out, STI)) return; } if (BaseReg != Mips::ZERO && BaseReg != Mips::ZERO_64) - TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu, TmpReg, TmpReg, - BaseReg, IDLoc, STI); - TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, LoOffset, IDLoc, STI); + TOut.emitRRR(ABI.ArePtrs64bit() ? Mips::DADDu : Mips::ADDu, TmpReg, + TmpReg, BaseReg, IDLoc, STI); + emitInstWithOffset(MCOperand::createImm(int16_t(LoOffset))); return; } @@ -3723,26 +3729,41 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, loadAndAddSymbolAddress(Res.getSymA(), TmpReg, BaseReg, !ABI.ArePtrs64bit(), IDLoc, Out, STI); - TOut.emitRRI(Inst.getOpcode(), DstReg, TmpReg, Res.getConstant(), IDLoc, - STI); + emitInstWithOffset(MCOperand::createImm(int16_t(Res.getConstant()))); } else { // FIXME: Implement 64-bit case. // 1) lw $8, sym => lui $8, %hi(sym) // lw $8, %lo(sym)($8) // 2) sw $8, sym => lui $at, %hi(sym) // sw $8, %lo(sym)($at) - const MCExpr *ExprOffset = OffsetOp.getExpr(); + const MCExpr *OffExpr = OffsetOp.getExpr(); MCOperand LoOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_LO, ExprOffset, getContext())); + MipsMCExpr::create(MipsMCExpr::MEK_LO, OffExpr, getContext())); MCOperand HiOperand = MCOperand::createExpr( - MipsMCExpr::create(MipsMCExpr::MEK_HI, ExprOffset, getContext())); - - // Generate the base address in TmpReg. - TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); - if (BaseReg != Mips::ZERO) - TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); - // Emit the load or store with the adjusted base and offset. - TOut.emitRRX(Inst.getOpcode(), DstReg, TmpReg, LoOperand, IDLoc, STI); + MipsMCExpr::create(MipsMCExpr::MEK_HI, OffExpr, getContext())); + + if (ABI.IsN64()) { + MCOperand HighestOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, OffExpr, getContext())); + MCOperand HigherOperand = MCOperand::createExpr( + MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, OffExpr, getContext())); + + TOut.emitRX(Mips::LUi, TmpReg, HighestOperand, IDLoc, STI); + TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, HigherOperand, IDLoc, STI); + TOut.emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, STI); + TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, HiOperand, IDLoc, STI); + TOut.emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, STI); + if (BaseReg != Mips::ZERO && BaseReg != Mips::ZERO_64) + TOut.emitRRR(Mips::DADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); + emitInstWithOffset(LoOperand); + } else { + // Generate the base address in TmpReg. + TOut.emitRX(Mips::LUi, TmpReg, HiOperand, IDLoc, STI); + if (BaseReg != Mips::ZERO) + TOut.emitRRR(Mips::ADDu, TmpReg, TmpReg, BaseReg, IDLoc, STI); + // Emit the load or store with the adjusted base and offset. + emitInstWithOffset(LoOperand); + } } return; } @@ -3750,6 +3771,64 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, llvm_unreachable("unexpected operand type"); } +void MipsAsmParser::expandMem9Inst(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI, bool IsLoad) { + unsigned NumOp = Inst.getNumOperands(); + assert((NumOp == 3 || NumOp == 4) && "unexpected operands number"); + unsigned StartOp = NumOp == 3 ? 0 : 1; + + const MCOperand &DstRegOp = Inst.getOperand(StartOp); + assert(DstRegOp.isReg() && "expected register operand kind"); + const MCOperand &BaseRegOp = Inst.getOperand(StartOp + 1); + assert(BaseRegOp.isReg() && "expected register operand kind"); + const MCOperand &OffsetOp = Inst.getOperand(StartOp + 2); + + MipsTargetStreamer &TOut = getTargetStreamer(); + unsigned OpCode = Inst.getOpcode(); + unsigned DstReg = DstRegOp.getReg(); + unsigned BaseReg = BaseRegOp.getReg(); + unsigned TmpReg = DstReg; + + const MCInstrDesc &Desc = getInstDesc(OpCode); + int16_t DstRegClass = Desc.OpInfo[StartOp].RegClass; + unsigned DstRegClassID = + getContext().getRegisterInfo()->getRegClass(DstRegClass).getID(); + bool IsGPR = (DstRegClassID == Mips::GPR32RegClassID) || + (DstRegClassID == Mips::GPR64RegClassID); + + if (!IsLoad || !IsGPR || (BaseReg == DstReg)) { + // At this point we need AT to perform the expansions + // and we exit if it is not available. + TmpReg = getATReg(IDLoc); + if (!TmpReg) + return; + } + + auto emitInst = [&]() { + if (NumOp == 3) + TOut.emitRRX(OpCode, DstReg, TmpReg, MCOperand::createImm(0), IDLoc, STI); + else + TOut.emitRRRX(OpCode, DstReg, DstReg, TmpReg, MCOperand::createImm(0), + IDLoc, STI); + }; + + if (OffsetOp.isImm()) { + loadImmediate(OffsetOp.getImm(), TmpReg, BaseReg, !ABI.ArePtrs64bit(), true, + IDLoc, Out, STI); + emitInst(); + return; + } + + if (OffsetOp.isExpr()) { + loadAndAddSymbolAddress(OffsetOp.getExpr(), TmpReg, BaseReg, + !ABI.ArePtrs64bit(), IDLoc, Out, STI); + emitInst(); + return; + } + + llvm_unreachable("unexpected operand type"); +} + bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 3c11edfc3fc78..02ab5ede2c1a4 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -16,6 +16,7 @@ #include "MipsFixupKinds.h" #include "MipsMCTargetDesc.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -127,6 +128,12 @@ namespace MipsII { HasFCCRegOperand = 1 << 6 }; + + enum OperandType : unsigned { + OPERAND_FIRST_MIPS_MEM_IMM = MCOI::OPERAND_FIRST_TARGET, + OPERAND_MEM_SIMM9 = OPERAND_FIRST_MIPS_MEM_IMM, + OPERAND_LAST_MIPS_MEM_IMM = OPERAND_MEM_SIMM9 + }; } } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index d84e4eada6466..d0b3c204730fb 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -12,6 +12,7 @@ #include "MipsMCTargetDesc.h" #include "MipsAsmBackend.h" +#include "MipsBaseInfo.h" #include "MipsELFStreamer.h" #include "MipsInstPrinter.h" #include "MipsMCAsmInfo.h" diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index b6dae9f6dea82..054dc79f4aa91 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -34,11 +34,6 @@ static cl::opt RoundSectionSizes( cl::desc("Round section sizes up to the section alignment"), cl::Hidden); } // end anonymous namespace -static bool isMipsR6(const MCSubtargetInfo *STI) { - return STI->getFeatureBits()[Mips::FeatureMips32r6] || - STI->getFeatureBits()[Mips::FeatureMips64r6]; -} - static bool isMicroMips(const MCSubtargetInfo *STI) { return STI->getFeatureBits()[Mips::FeatureMicroMips]; } @@ -332,36 +327,6 @@ void MipsTargetStreamer::emitStoreWithImmOffset( emitRRI(Opcode, SrcReg, ATReg, LoOffset, IDLoc, STI); } -/// Emit a store instruction with an symbol offset. -void MipsTargetStreamer::emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, - unsigned BaseReg, - MCOperand &HiOperand, - MCOperand &LoOperand, - unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI) { - // sc $8, sym => lui $at, %hi(sym) - // sc $8, %lo(sym)($at) - - // Generate the base address in ATReg. - emitRX(Mips::LUi, ATReg, HiOperand, IDLoc, STI); - if (!isMicroMips(STI) && isMipsR6(STI)) { - // For non-micromips r6 offset for 'sc' is not in the lower 16 bits so we - // put it in 'at'. - // sc $8, sym => lui $at, %hi(sym) - // addiu $at, $at, %lo(sym) - // sc $8, 0($at) - emitRRX(Mips::ADDiu, ATReg, ATReg, LoOperand, IDLoc, STI); - MCOperand Offset = MCOperand::createImm(0); - // Emit the store with the adjusted base and offset. - emitRRRX(Opcode, SrcReg, SrcReg, ATReg, Offset, IDLoc, STI); - } else { - if (BaseReg != Mips::ZERO) - emitRRR(Mips::ADDu, ATReg, ATReg, BaseReg, IDLoc, STI); - // Emit the store with the adjusted base and offset. - emitRRRX(Opcode, SrcReg, SrcReg, ATReg, LoOperand, IDLoc, STI); - } -} - /// Emit a load instruction with an immediate offset. DstReg and TmpReg are /// permitted to be the same register iff DstReg is distinct from BaseReg and /// DstReg is a GPR. It is the callers responsibility to identify such cases diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td index a735d45ddbfcf..9607d008bc979 100644 --- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td @@ -765,12 +765,12 @@ class LL_R6_DESC_BASE; +class LL_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9_exp, II_LL>; class SC_R6_DESC_BASE { dag OutOperandList = (outs GPROpnd:$dst); - dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr); + dag InOperandList = (ins GPROpnd:$rt, mem_simm9_exp:$addr); string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); list Pattern = []; bit mayStore = 1; diff --git a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td index efebd77e531fe..33132d9ede92a 100644 --- a/llvm/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips64r6InstrInfo.td @@ -75,7 +75,7 @@ class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, II_DMUL, mul>; class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd, II_DMUL>; class LDPC_DESC : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3, II_LDPC>; class LWUPC_DESC : PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2, II_LWUPC>; -class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simmptr, II_LLD>; +class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd, mem_simm9_exp, II_LLD>; class SCD_R6_DESC : SC_R6_DESC_BASE<"scd", GPR64Opnd, II_SCD>; class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>; class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>; @@ -106,7 +106,7 @@ class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd, list Defs = [AT]; } -class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9, II_LL>; +class LL64_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd, mem_simm9_exp, II_LL>; class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>; class JR_HB64_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR64Opnd> { diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td index da8be7c640b8b..3b626383d1d5a 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -1140,6 +1140,13 @@ def simm12 : Operand { let DecoderMethod = "DecodeSimm12"; } +def mem_simm9_exp : mem_generic { + let MIOperandInfo = (ops ptr_rc, simm9); + let ParserMatchClass = MipsMemSimmPtrAsmOperand; + let OperandNamespace = "MipsII"; + let OperandType = "OPERAND_MEM_SIMM9"; +} + foreach I = {9, 10, 11, 12, 16} in def mem_simm # I : mem_generic { let MIOperandInfo = (ops ptr_rc, !cast("simm" # I)); diff --git a/llvm/lib/Target/Mips/MipsTargetStreamer.h b/llvm/lib/Target/Mips/MipsTargetStreamer.h index 298d056ce2c35..b389ba8938c4b 100644 --- a/llvm/lib/Target/Mips/MipsTargetStreamer.h +++ b/llvm/lib/Target/Mips/MipsTargetStreamer.h @@ -156,10 +156,6 @@ class MipsTargetStreamer : public MCTargetStreamer { unsigned BaseReg, int64_t Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI); - void emitSCWithSymOffset(unsigned Opcode, unsigned SrcReg, unsigned BaseReg, - MCOperand &HiOperand, MCOperand &LoOperand, - unsigned ATReg, SMLoc IDLoc, - const MCSubtargetInfo *STI); void emitLoadWithImmOffset(unsigned Opcode, unsigned DstReg, unsigned BaseReg, int64_t Offset, unsigned TmpReg, SMLoc IDLoc, const MCSubtargetInfo *STI); diff --git a/llvm/test/MC/Mips/ll-expansion.s b/llvm/test/MC/Mips/ll-expansion.s new file mode 100644 index 0000000000000..4653a33d7e787 --- /dev/null +++ b/llvm/test/MC/Mips/ll-expansion.s @@ -0,0 +1,406 @@ +# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips2 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 +# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 +# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32r2 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 +# RUN: llvm-mc -filetype=obj -triple mipsn32 -mcpu=mips3 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSN32 +# RUN: llvm-mc -filetype=obj -triple mipsn32 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSN32R6 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r2 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32R6 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64R6 + +ll $2, 128($sp) +# MIPS32: c3 a2 00 80 ll $2, 128($sp) +# MIPS32R6: 7f a2 40 36 ll $2, 128($sp) +# MIPSN32: c3 a2 00 80 ll $2, 128($sp) +# MIPSN32R6: 7f a2 40 36 ll $2, 128($sp) +# MIPS64: c3 a2 00 80 ll $2, 128($sp) +# MIPS64R6: 7f a2 40 36 ll $2, 128($sp) + +ll $2, -128($sp) +# MIPS32: c3 a2 ff 80 ll $2, -128($sp) +# MIPS32R6: 7f a2 c0 36 ll $2, -128($sp) +# MIPSN32: c3 a2 ff 80 ll $2, -128($sp) +# MIPSN32R6: 7f a2 c0 36 ll $2, -128($sp) +# MIPS64: c3 a2 ff 80 ll $2, -128($sp) +# MIPS64R6: 7f a2 c0 36 ll $2, -128($sp) + +ll $2, 256($sp) +# MIPS32: c3 a2 01 00 ll $2, 256($sp) + +# MIPS32R6: 27 a2 01 00 addiu $2, $sp, 256 +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: c3 a2 01 00 ll $2, 256($sp) + +# MIPSN32R6: 27 a2 01 00 addiu $2, $sp, 256 +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: c3 a2 01 00 ll $2, 256($sp) + +# MIPS64R6: 67 a2 01 00 daddiu $2, $sp, 256 +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, -257($sp) +# MIPS32: c3 a2 fe ff ll $2, -257($sp) + +# MIPS32R6: 27 a2 fe ff addiu $2, $sp, -257 +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: c3 a2 fe ff ll $2, -257($sp) + +# MIPSN32R6: 27 a2 fe ff addiu $2, $sp, -257 +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: c3 a2 fe ff ll $2, -257($sp) + +# MIPS64R6: 67 a2 fe ff daddiu $2, $sp, -257 +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, 32767($sp) +# MIPS32: c3 a2 7f ff ll $2, 32767($sp) + +# MIPS32R6: 27 a2 7f ff addiu $2, $sp, 32767 +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: c3 a2 7f ff ll $2, 32767($sp) + +# MIPSN32R6: 27 a2 7f ff addiu $2, $sp, 32767 +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: c3 a2 7f ff ll $2, 32767($sp) + +# MIPS64R6: 67 a2 7f ff daddiu $2, $sp, 32767 +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, 32768($sp) +# MIPS32: 3c 02 00 01 lui $2, 1 +# MIPS32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32-NEXT: c0 42 80 00 ll $2, -32768($2) + +# MIPS32R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPS32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: 3c 02 00 01 lui $2, 1 +# MIPSN32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32-NEXT: c0 42 80 00 ll $2, -32768($2) + +# MIPSN32R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPSN32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: 3c 02 00 01 lui $2, 1 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: c0 42 80 00 ll $2, -32768($2) + +# MIPS64R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, -32768($sp) +# MIPS32: c3 a2 80 00 ll $2, -32768($sp) + +# MIPS32R6: 27 a2 80 00 addiu $2, $sp, -32768 +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: c3 a2 80 00 ll $2, -32768($sp) + +# MIPSN32R6: 27 a2 80 00 addiu $2, $sp, -32768 +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: c3 a2 80 00 ll $2, -32768($sp) + +# MIPS64R6: 67 a2 80 00 daddiu $2, $sp, -32768 +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, -32769($sp) +# MIPS32: 3c 02 ff ff lui $2, 65535 +# MIPS32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32-NEXT: c0 42 7f ff ll $2, 32767($2) + +# MIPS32R6: 3c 02 ff ff aui $2, $zero, 65535 +# MIPS32R6-NEXT: 34 42 7f ff ori $2, $2, 32767 +# MIPS32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: 3c 02 ff ff lui $2, 65535 +# MIPSN32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32-NEXT: c0 42 7f ff ll $2, 32767($2) + +# MIPSN32R6: 3c 02 ff ff aui $2, $zero, 65535 +# MIPSN32R6-NEXT: 34 42 7f ff ori $2, $2, 32767 +# MIPSN32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: 3c 02 ff ff lui $2, 65535 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: c0 42 7f ff ll $2, 32767($2) + +# MIPS64R6: 3c 02 ff ff aui $2, $zero, 65535 +# MIPS64R6-NEXT: 34 42 7f ff ori $2, $2, 32767 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, 655987($sp) +# MIPS32: 3c 02 00 0a lui $2, 10 +# MIPS32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32-NEXT: c0 42 02 73 ll $2, 627($2) + +# MIPS32R6: 3c 02 00 0a aui $2, $zero, 10 +# MIPS32R6-NEXT: 34 42 02 73 ori $2, $2, 627 +# MIPS32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: 3c 02 00 0a lui $2, 10 +# MIPSN32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32-NEXT: c0 42 02 73 ll $2, 627($2) + +# MIPSN32R6: 3c 02 00 0a aui $2, $zero, 10 +# MIPSN32R6-NEXT: 34 42 02 73 ori $2, $2, 627 +# MIPSN32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: 3c 02 00 0a lui $2, 10 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: c0 42 02 73 ll $2, 627($2) + +# MIPS64R6: 3c 02 00 0a aui $2, $zero, 10 +# MIPS64R6-NEXT: 34 42 02 73 ori $2, $2, 627 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $2, -655987($sp) +# MIPS32: 3c 02 ff f6 lui $2, 65526 +# MIPS32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32-NEXT: c0 42 fd 8d ll $2, -627($2) + +# MIPS32R6: 3c 02 ff f5 aui $2, $zero, 65525 +# MIPS32R6-NEXT: 34 42 fd 8d ori $2, $2, 64909 +# MIPS32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPS32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPSN32: 3c 02 ff f6 lui $2, 65526 +# MIPSN32-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32-NEXT: c0 42 fd 8d ll $2, -627($2) + +# MIPSN32R6: 3c 02 ff f5 aui $2, $zero, 65525 +# MIPSN32R6-NEXT: 34 42 fd 8d ori $2, $2, 64909 +# MIPSN32R6-NEXT: 00 5d 10 21 addu $2, $2, $sp +# MIPSN32R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +# MIPS64: 3c 02 ff f6 lui $2, 65526 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: c0 42 fd 8d ll $2, -627($2) + +# MIPS64R6: 3c 02 ff f5 aui $2, $zero, 65525 +# MIPS64R6-NEXT: 34 42 fd 8d ori $2, $2, 64909 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 36 ll $2, 0($2) + +ll $12, symbol +# MIPS32: 3c 0c 00 00 lui $12, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 3c 0c 00 00 lui $12, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol +# MIPSN32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPSN32-NEXT: R_MIPS_LO16 symbol + +# MIPSN32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol +# MIPSN32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +ll $12, symbol($3) +# MIPS32: 3c 0c 00 00 lui $12, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: 01 83 60 21 addu $12, $12, $3 +# MIPS32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 01 83 60 21 addu $12, $12, $3 +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 3c 0c 00 00 lui $12, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol +# MIPSN32-NEXT: 01 83 60 21 addu $12, $12, $3 +# MIPSN32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPSN32-NEXT: R_MIPS_LO16 symbol + +# MIPSN32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol +# MIPSN32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol +# MIPSN32R6-NEXT: 01 83 60 21 addu $12, $12, $3 +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 01 83 60 2d daddu $12, $12, $3 +# MIPS64-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 01 83 60 2d daddu $12, $12, $3 +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +ll $12, symbol+8 +# MIPS32: 3c 0c 00 00 lui $12, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: c1 8c 00 08 ll $12, 8($12) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 25 8c 00 08 addiu $12, $12, 8 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 3c 0c 00 00 lui $12, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol+0x8 +# MIPSN32-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPSN32-NEXT: R_MIPS_LO16 symbol+0x8 + +# MIPSN32R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol+0x8 +# MIPSN32R6-NEXT: 25 8c 00 00 addiu $12, $12, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol+0x8 +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: c1 8c 00 00 ll $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 + +# MIPS64R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +.option pic2 + +ll $12, symbol +# MIPS32: 8f 8c 00 00 lw $12, 0($gp) +# MIPS32-NEXT: R_MIPS_GOT16 symbol +# MIPS32-NEXT: c1 8c 00 00 ll $12, 0($12) + +# MIPS32R6: 8f 8c 00 00 lw $12, 0($gp) +# MIPS32R6-NEXT: R_MIPS_GOT16 symbol +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 8f 8c 00 00 lw $12, 0($gp) +# MIPSN32-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32-NEXT: c1 8c 00 00 ll $12, 0($12) + +# MIPSN32R6: 8f 8c 00 00 lw $12, 0($gp) +# MIPSN32R6-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: df 8c 00 00 ld $12, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: c1 8c 00 00 ll $12, 0($12) + +# MIPS64R6: df 8c 00 00 ld $12, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +ll $12, symbol+8 +# MIPS32: 8f 8c 00 00 lw $12, 0($gp) +# MIPS32-NEXT: R_MIPS_GOT16 symbol +# MIPS32-NEXT: c1 8c 00 08 ll $12, 8($12) + +# MIPS32R6: 8f 8c 00 00 lw $12, 0($gp) +# MIPS32R6-NEXT: R_MIPS_GOT16 symbol +# MIPS32R6-NEXT: 25 8c 00 08 addiu $12, $12, 8 +# MIPS32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPSN32: 8f 8c 00 00 lw $12, 0($gp) +# MIPSN32-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32-NEXT: c1 8c 00 08 ll $12, 8($12) + +# MIPSN32R6: 8f 8c 00 00 lw $12, 0($gp) +# MIPSN32R6-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32R6-NEXT: 25 8c 00 08 addiu $12, $12, 8 +# MIPSN32R6-NEXT: 7d 8c 00 36 ll $12, 0($12) + +# MIPS64: df 8c 00 00 ld $12, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: c1 8c 00 08 ll $12, 8($12) + +# MIPS64R6: df 8c 00 00 ld $12, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 08 daddiu $12, $12, 8 +# MIPS64R6-NEXT: 7d 8c 00 36 ll $12, 0($12) diff --git a/llvm/test/MC/Mips/lld-expansion.s b/llvm/test/MC/Mips/lld-expansion.s new file mode 100644 index 0000000000000..48755d59a2400 --- /dev/null +++ b/llvm/test/MC/Mips/lld-expansion.s @@ -0,0 +1,188 @@ +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64R6 + +lld $2, 128($sp) +# MIPS64: d3 a2 00 80 lld $2, 128($sp) +# MIPS64R6: 7f a2 40 37 lld $2, 128($sp) + +lld $2, -128($sp) +# MIPS64: d3 a2 ff 80 lld $2, -128($sp) +# MIPS64R6: 7f a2 c0 37 lld $2, -128($sp) + +lld $2, 256($sp) +# MIPS64: d3 a2 01 00 lld $2, 256($sp) + +# MIPS64R6: 67 a2 01 00 daddiu $2, $sp, 256 +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, -257($sp) +# MIPS64: d3 a2 fe ff lld $2, -257($sp) + +# MIPS64R6: 67 a2 fe ff daddiu $2, $sp, -257 +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, 32767($sp) +# MIPS64: d3 a2 7f ff lld $2, 32767($sp) + +# MIPS64R6: 67 a2 7f ff daddiu $2, $sp, 32767 +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, 32768($sp) +# MIPS64: 3c 02 00 01 lui $2, 1 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 80 00 lld $2, -32768($2) + +# MIPS64R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, -32768($sp) +# MIPS64: d3 a2 80 00 lld $2, -32768($sp) + +# MIPS64R6: 67 a2 80 00 daddiu $2, $sp, -32768 +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, -32769($sp) +# MIPS64: 3c 02 ff ff lui $2, 65535 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 7f ff lld $2, 32767($2) + +# MIPS64R6: 3c 02 ff ff aui $2, $zero, 65535 +# MIPS64R6-NEXT: 34 42 7f ff ori $2, $2, 32767 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, 2147483648($sp) +# MIPS64: 34 02 80 00 ori $2, $zero, 32768 +# MIPS64-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 00 00 lld $2, 0($2) + +# MIPS64R6: 34 02 80 00 ori $2, $zero, 32768 +# MIPS64R6-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, -2147483648($sp) +# MIPS64: 3c 02 80 00 lui $2, 32768 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 00 00 lld $2, 0($2) + +# MIPS64R6: 3c 02 80 00 aui $2, $zero, 32768 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $2, 9223372036853775808($sp) +# MIPS64: 3c 02 7f ff lui $2, 32767 +# MIPS64-NEXT: 34 42 ff ff ori $2, $2, 65535 +# MIPS64-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64-NEXT: 34 42 ff f1 ori $2, $2, 65521 +# MIPS64-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64-NEXT: d0 42 bd c0 lld $2, -16960($2) + +# MIPS64R6: 3c 02 7f ff aui $2, $zero, 32767 +# MIPS64R6-NEXT: 34 42 ff ff ori $2, $2, 65535 +# MIPS64R6-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64R6-NEXT: 34 42 ff f0 ori $2, $2, 65520 +# MIPS64R6-NEXT: 00 02 14 38 dsll $2, $2, 16 +# MIPS64R6-NEXT: 34 42 bd c0 ori $2, $2, 48576 +# MIPS64R6-NEXT: 00 5d 10 2d daddu $2, $2, $sp +# MIPS64R6-NEXT: 7c 42 00 37 lld $2, 0($2) + +lld $12, symbol +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: d1 8c 00 00 lld $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) + +lld $12, symbol($3) +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 01 83 60 2d daddu $12, $12, $3 +# MIPS64-NEXT: d1 8c 00 00 lld $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6-NEXT: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 01 83 60 2d daddu $12, $12, $3 +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) + +lld $12, symbol+8 +# MIPS64: 3c 0c 00 00 lui $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 0c 64 38 dsll $12, $12, 16 +# MIPS64-NEXT: d1 8c 00 00 lld $12, 0($12) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 + +# MIPS64R6-NEXT: 3c 0c 00 00 aui $12, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 65 8c 00 00 daddiu $12, $12, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 0c 60 3c dsll32 $12, $12, 0 +# MIPS64R6-NEXT: 01 81 60 2d daddu $12, $12, $1 +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) + +.option pic2 + +lld $12, symbol +# MIPS64: df 8c 00 00 ld $12, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: d1 8c 00 00 lld $12, 0($12) + +# MIPS64R6: df 8c 00 00 ld $12, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) + +lld $12, symbol+8 +# MIPS64: df 8c 00 00 ld $12, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: d1 8c 00 08 lld $12, 8($12) + +# MIPS64R6: df 8c 00 00 ld $12, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 65 8c 00 08 daddiu $12, $12, 8 +# MIPS64R6-NEXT: 7d 8c 00 37 lld $12, 0($12) diff --git a/llvm/test/MC/Mips/sc-expansion.s b/llvm/test/MC/Mips/sc-expansion.s index 76b30f174f9e2..b407f7aaf5700 100644 --- a/llvm/test/MC/Mips/sc-expansion.s +++ b/llvm/test/MC/Mips/sc-expansion.s @@ -1,48 +1,406 @@ # RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips2 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 # RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 # RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32r2 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS -# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips3 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS -# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips64 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS -# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips64r2 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32 +# RUN: llvm-mc -filetype=obj -triple mipsn32 -mcpu=mips3 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSN32 +# RUN: llvm-mc -filetype=obj -triple mipsn32 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSN32R6 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r2 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 # RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips32r6 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSR6 -# RUN: llvm-mc -filetype=obj -triple mips -mcpu=mips64r6 %s -o - \ -# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPSR6 - -# MIPS: e0 6c 00 00 sc $12, 0($3) -# MIPSR6: 7c 6c 00 26 sc $12, 0($3) -sc $12, 0($3) - -# MIPS: e0 6c 00 04 sc $12, 4($3) -# MIPSR6: 7c 6c 02 26 sc $12, 4($3) -sc $12, 4($3) - -# MIPS: 3c 01 00 00 lui $1, 0 -# MIPS: R_MIPS_HI16 symbol -# MIPS: e0 2c 00 00 sc $12, 0($1) -# MIPS: R_MIPS_LO16 symbol - -# MIPSR6: 3c 01 00 00 aui $1, $zero, 0 -# MIPSR6: R_MIPS_HI16 symbol -# MIPSR6: 24 21 00 00 addiu $1, $1, 0 -# MIPSR6: R_MIPS_LO16 symbol -# MIPSR6: 7c 2c 00 26 sc $12, 0($1) +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS32R6 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64R6 + +sc $2, 128($sp) +# MIPS32: e3 a2 00 80 sc $2, 128($sp) +# MIPS32R6: 7f a2 40 26 sc $2, 128($sp) +# MIPSN32: e3 a2 00 80 sc $2, 128($sp) +# MIPSN32R6: 7f a2 40 26 sc $2, 128($sp) +# MIPS64: e3 a2 00 80 sc $2, 128($sp) +# MIPS64R6: 7f a2 40 26 sc $2, 128($sp) + +sc $2, -128($sp) +# MIPS32: e3 a2 ff 80 sc $2, -128($sp) +# MIPS32R6: 7f a2 c0 26 sc $2, -128($sp) +# MIPSN32: e3 a2 ff 80 sc $2, -128($sp) +# MIPSN32R6: 7f a2 c0 26 sc $2, -128($sp) +# MIPS64: e3 a2 ff 80 sc $2, -128($sp) +# MIPS64R6: 7f a2 c0 26 sc $2, -128($sp) + +sc $2, 256($sp) +# MIPS32: e3 a2 01 00 sc $2, 256($sp) + +# MIPS32R6: 27 a1 01 00 addiu $1, $sp, 256 +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: e3 a2 01 00 sc $2, 256($sp) + +# MIPSN32R6: 27 a1 01 00 addiu $1, $sp, 256 +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: e3 a2 01 00 sc $2, 256($sp) + +# MIPS64R6: 67 a1 01 00 daddiu $1, $sp, 256 +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, -257($sp) +# MIPS32: e3 a2 fe ff sc $2, -257($sp) + +# MIPS32R6: 27 a1 fe ff addiu $1, $sp, -257 +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: e3 a2 fe ff sc $2, -257($sp) + +# MIPSN32R6: 27 a1 fe ff addiu $1, $sp, -257 +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: e3 a2 fe ff sc $2, -257($sp) + +# MIPS64R6: 67 a1 fe ff daddiu $1, $sp, -257 +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, 32767($sp) +# MIPS32: e3 a2 7f ff sc $2, 32767($sp) + +# MIPS32R6: 27 a1 7f ff addiu $1, $sp, 32767 +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: e3 a2 7f ff sc $2, 32767($sp) + +# MIPSN32R6: 27 a1 7f ff addiu $1, $sp, 32767 +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: e3 a2 7f ff sc $2, 32767($sp) + +# MIPS64R6: 67 a1 7f ff daddiu $1, $sp, 32767 +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, 32768($sp) +# MIPS32: 3c 01 00 01 lui $1, 1 +# MIPS32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32-NEXT: e0 22 80 00 sc $2, -32768($1) + +# MIPS32R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPS32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: 3c 01 00 01 lui $1, 1 +# MIPSN32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32-NEXT: e0 22 80 00 sc $2, -32768($1) + +# MIPSN32R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPSN32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: 3c 01 00 01 lui $1, 1 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: e0 22 80 00 sc $2, -32768($1) + +# MIPS64R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, -32768($sp) +# MIPS32: e3 a2 80 00 sc $2, -32768($sp) + +# MIPS32R6: 27 a1 80 00 addiu $1, $sp, -32768 +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: e3 a2 80 00 sc $2, -32768($sp) + +# MIPSN32R6: 27 a1 80 00 addiu $1, $sp, -32768 +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: e3 a2 80 00 sc $2, -32768($sp) + +# MIPS64R6: 67 a1 80 00 daddiu $1, $sp, -32768 +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, -32769($sp) +# MIPS32: 3c 01 ff ff lui $1, 65535 +# MIPS32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32-NEXT: e0 22 7f ff sc $2, 32767($1) + +# MIPS32R6: 3c 01 ff ff aui $1, $zero, 65535 +# MIPS32R6-NEXT: 34 21 7f ff ori $1, $1, 32767 +# MIPS32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: 3c 01 ff ff lui $1, 65535 +# MIPSN32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32-NEXT: e0 22 7f ff sc $2, 32767($1) + +# MIPSN32R6: 3c 01 ff ff aui $1, $zero, 65535 +# MIPSN32R6-NEXT: 34 21 7f ff ori $1, $1, 32767 +# MIPSN32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: 3c 01 ff ff lui $1, 65535 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: e0 22 7f ff sc $2, 32767($1) + +# MIPS64R6: 3c 01 ff ff aui $1, $zero, 65535 +# MIPS64R6-NEXT: 34 21 7f ff ori $1, $1, 32767 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, 655987($sp) +# MIPS32: 3c 01 00 0a lui $1, 10 +# MIPS32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32-NEXT: e0 22 02 73 sc $2, 627($1) + +# MIPS32R6: 3c 01 00 0a aui $1, $zero, 10 +# MIPS32R6-NEXT: 34 21 02 73 ori $1, $1, 627 +# MIPS32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: 3c 01 00 0a lui $1, 10 +# MIPSN32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32-NEXT: e0 22 02 73 sc $2, 627($1) + +# MIPSN32R6: 3c 01 00 0a aui $1, $zero, 10 +# MIPSN32R6-NEXT: 34 21 02 73 ori $1, $1, 627 +# MIPSN32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: 3c 01 00 0a lui $1, 10 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: e0 22 02 73 sc $2, 627($1) + +# MIPS64R6: 3c 01 00 0a aui $1, $zero, 10 +# MIPS64R6-NEXT: 34 21 02 73 ori $1, $1, 627 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $2, -655987($sp) +# MIPS32: 3c 01 ff f6 lui $1, 65526 +# MIPS32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32-NEXT: e0 22 fd 8d sc $2, -627($1) + +# MIPS32R6: 3c 01 ff f5 aui $1, $zero, 65525 +# MIPS32R6-NEXT: 34 21 fd 8d ori $1, $1, 64909 +# MIPS32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPS32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPSN32: 3c 01 ff f6 lui $1, 65526 +# MIPSN32-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32-NEXT: e0 22 fd 8d sc $2, -627($1) + +# MIPSN32R6: 3c 01 ff f5 aui $1, $zero, 65525 +# MIPSN32R6-NEXT: 34 21 fd 8d ori $1, $1, 64909 +# MIPSN32R6-NEXT: 00 3d 08 21 addu $1, $1, $sp +# MIPSN32R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +# MIPS64: 3c 01 ff f6 lui $1, 65526 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: e0 22 fd 8d sc $2, -627($1) + +# MIPS64R6: 3c 01 ff f5 aui $1, $zero, 65525 +# MIPS64R6-NEXT: 34 21 fd 8d ori $1, $1, 64909 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 26 sc $2, 0($1) + +sc $12, symbol +# MIPS32: 3c 01 00 00 lui $1, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 3c 01 00 00 lui $1, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol +# MIPSN32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPSN32-NEXT: R_MIPS_LO16 symbol + +# MIPSN32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol +# MIPSN32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +sc $12, symbol($3) +# MIPS32: 3c 01 00 00 lui $1, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: 00 23 08 21 addu $1, $1, $3 +# MIPS32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 00 23 08 21 addu $1, $1, $3 +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 3c 01 00 00 lui $1, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol +# MIPSN32-NEXT: 00 23 08 21 addu $1, $1, $3 +# MIPSN32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPSN32-NEXT: R_MIPS_LO16 symbol + +# MIPSN32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol +# MIPSN32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol +# MIPSN32R6-NEXT: 00 23 08 21 addu $1, $1, $3 +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 00 23 08 2d daddu $1, $1, $3 +# MIPS64-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 23 08 2d daddu $1, $1, $3 +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +sc $12, symbol+8 +# MIPS32: 3c 01 00 00 lui $1, 0 +# MIPS32-NEXT: R_MIPS_HI16 symbol +# MIPS32-NEXT: e0 2c 00 08 sc $12, 8($1) +# MIPS32-NEXT: R_MIPS_LO16 symbol + +# MIPS32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS32R6-NEXT: R_MIPS_HI16 symbol +# MIPS32R6-NEXT: 24 21 00 08 addiu $1, $1, 8 +# MIPS32R6-NEXT: R_MIPS_LO16 symbol +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 3c 01 00 00 lui $1, 0 +# MIPSN32-NEXT: R_MIPS_HI16 symbol+0x8 +# MIPSN32-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPSN32-NEXT: R_MIPS_LO16 symbol+0x8 + +# MIPSN32R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPSN32R6-NEXT: R_MIPS_HI16 symbol+0x8 +# MIPSN32R6-NEXT: 24 21 00 00 addiu $1, $1, 0 +# MIPSN32R6-NEXT: R_MIPS_LO16 symbol+0x8 +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: e0 2c 00 00 sc $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +.option pic2 + sc $12, symbol +# MIPS32: 8f 81 00 00 lw $1, 0($gp) +# MIPS32-NEXT: R_MIPS_GOT16 symbol +# MIPS32-NEXT: e0 2c 00 00 sc $12, 0($1) + +# MIPS32R6: 8f 81 00 00 lw $1, 0($gp) +# MIPS32R6-NEXT: R_MIPS_GOT16 symbol +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 8f 81 00 00 lw $1, 0($gp) +# MIPSN32-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32-NEXT: e0 2c 00 00 sc $12, 0($1) + +# MIPSN32R6: 8f 81 00 00 lw $1, 0($gp) +# MIPSN32R6-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: df 81 00 00 ld $1, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: e0 2c 00 00 sc $12, 0($1) + +# MIPS64R6: df 81 00 00 ld $1, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +sc $12, symbol+8 +# MIPS32: 8f 81 00 00 lw $1, 0($gp) +# MIPS32-NEXT: R_MIPS_GOT16 symbol +# MIPS32-NEXT: e0 2c 00 08 sc $12, 8($1) + +# MIPS32R6: 8f 81 00 00 lw $1, 0($gp) +# MIPS32R6-NEXT: R_MIPS_GOT16 symbol +# MIPS32R6-NEXT: 24 21 00 08 addiu $1, $1, 8 +# MIPS32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPSN32: 8f 81 00 00 lw $1, 0($gp) +# MIPSN32-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32-NEXT: e0 2c 00 08 sc $12, 8($1) + +# MIPSN32R6: 8f 81 00 00 lw $1, 0($gp) +# MIPSN32R6-NEXT: R_MIPS_GOT_DISP symbol +# MIPSN32R6-NEXT: 24 21 00 08 addiu $1, $1, 8 +# MIPSN32R6-NEXT: 7c 2c 00 26 sc $12, 0($1) + +# MIPS64: df 81 00 00 ld $1, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: e0 2c 00 08 sc $12, 8($1) -# MIPS: 3c 01 00 00 lui $1, 0 -# MIPS: R_MIPS_HI16 symbol -# MIPS: e0 2c 00 08 sc $12, 8($1) -# MIPS: R_MIPS_LO16 symbol - -# MIPSR6: 3c 01 00 00 aui $1, $zero, 0 -# MIPSR6: R_MIPS_HI16 symbol -# MIPSR6: 24 21 00 08 addiu $1, $1, 8 -# MIPSR6: R_MIPS_LO16 symbol -# MIPSR6: 7c 2c 00 26 sc $12, 0($1) -sc $12, symbol + 8 +# MIPS64R6: df 81 00 00 ld $1, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 08 daddiu $1, $1, 8 +# MIPS64R6-NEXT: 7c 2c 00 26 sc $12, 0($1) diff --git a/llvm/test/MC/Mips/scd-expansion.s b/llvm/test/MC/Mips/scd-expansion.s new file mode 100644 index 0000000000000..54a3baa5d68fe --- /dev/null +++ b/llvm/test/MC/Mips/scd-expansion.s @@ -0,0 +1,188 @@ +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64 +# RUN: llvm-mc -filetype=obj -triple mips64 -mcpu=mips64r6 %s -o - \ +# RUN: | llvm-objdump -d -r - | FileCheck %s --check-prefix=MIPS64R6 + +scd $2, 128($sp) +# MIPS64: f3 a2 00 80 scd $2, 128($sp) +# MIPS64R6: 7f a2 40 27 scd $2, 128($sp) + +scd $2, -128($sp) +# MIPS64: f3 a2 ff 80 scd $2, -128($sp) +# MIPS64R6: 7f a2 c0 27 scd $2, -128($sp) + +scd $2, 256($sp) +# MIPS64: f3 a2 01 00 scd $2, 256($sp) + +# MIPS64R6: 67 a1 01 00 daddiu $1, $sp, 256 +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, -257($sp) +# MIPS64: f3 a2 fe ff scd $2, -257($sp) + +# MIPS64R6: 67 a1 fe ff daddiu $1, $sp, -257 +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, 32767($sp) +# MIPS64: f3 a2 7f ff scd $2, 32767($sp) + +# MIPS64R6: 67 a1 7f ff daddiu $1, $sp, 32767 +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, 32768($sp) +# MIPS64: 3c 01 00 01 lui $1, 1 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 80 00 scd $2, -32768($1) + +# MIPS64R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, -32768($sp) +# MIPS64: f3 a2 80 00 scd $2, -32768($sp) + +# MIPS64R6: 67 a1 80 00 daddiu $1, $sp, -32768 +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, -32769($sp) +# MIPS64: 3c 01 ff ff lui $1, 65535 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 7f ff scd $2, 32767($1) + +# MIPS64R6: 3c 01 ff ff aui $1, $zero, 65535 +# MIPS64R6-NEXT: 34 21 7f ff ori $1, $1, 32767 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, 2147483648($sp) +# MIPS64: 34 01 80 00 ori $1, $zero, 32768 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 00 00 scd $2, 0($1) + +# MIPS64R6: 34 01 80 00 ori $1, $zero, 32768 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, -2147483648($sp) +# MIPS64: 3c 01 80 00 lui $1, 32768 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 00 00 scd $2, 0($1) + +# MIPS64R6: 3c 01 80 00 aui $1, $zero, 32768 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $2, 9223372036853775808($sp) +# MIPS64: 3c 01 7f ff lui $1, 32767 +# MIPS64-NEXT: 34 21 ff ff ori $1, $1, 65535 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 34 21 ff f1 ori $1, $1, 65521 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64-NEXT: f0 22 bd c0 scd $2, -16960($1) + +# MIPS64R6: 3c 01 7f ff aui $1, $zero, 32767 +# MIPS64R6-NEXT: 34 21 ff ff ori $1, $1, 65535 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 34 21 ff f0 ori $1, $1, 65520 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 34 21 bd c0 ori $1, $1, 48576 +# MIPS64R6-NEXT: 00 3d 08 2d daddu $1, $1, $sp +# MIPS64R6-NEXT: 7c 22 00 27 scd $2, 0($1) + +scd $12, symbol +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: f0 2c 00 00 scd $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) + +scd $12, symbol($3) +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 00 23 08 2d daddu $1, $1, $3 +# MIPS64-NEXT: f0 2c 00 00 scd $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 00 23 08 2d daddu $1, $1, $3 +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) + +scd $12, symbol+8 +# MIPS64: 3c 01 00 00 lui $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64-NEXT: f0 2c 00 00 scd $12, 0($1) +# MIPS64-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 + +# MIPS64R6: 3c 01 00 00 aui $1, $zero, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHEST/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HIGHER/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_HI16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 00 01 0c 38 dsll $1, $1, 16 +# MIPS64R6-NEXT: 64 21 00 00 daddiu $1, $1, 0 +# MIPS64R6-NEXT: R_MIPS_LO16/R_MIPS_NONE/R_MIPS_NONE symbol+0x8 +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) + +.option pic2 + +scd $12, symbol +# MIPS64: df 81 00 00 ld $1, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: f0 2c 00 00 scd $12, 0($1) + +# MIPS64R6: df 81 00 00 ld $1, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) + +scd $12, symbol+8 +# MIPS64: df 81 00 00 ld $1, 0($gp) +# MIPS64-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64-NEXT: f0 2c 00 08 scd $12, 8($1) + +# MIPS64R6: df 81 00 00 ld $1, 0($gp) +# MIPS64R6-NEXT: R_MIPS_GOT_DISP/R_MIPS_NONE/R_MIPS_NONE symbol +# MIPS64R6-NEXT: 64 21 00 08 daddiu $1, $1, 8 +# MIPS64R6-NEXT: 7c 2c 00 27 scd $12, 0($1) From 8d20dd0b0694af6a43b63fd9ebd5c27db653b7dd Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 Nov 2019 16:52:18 -0500 Subject: [PATCH 071/591] [ConstFolding] move tests for copysign; NFC InstCombine doesn't have any transforms for copysign currently. --- .../test/Analysis/ConstantFolding/copysign.ll | 53 +++++++++++++++++++ llvm/test/Transforms/InstCombine/copysign.ll | 49 ----------------- 2 files changed, 53 insertions(+), 49 deletions(-) create mode 100644 llvm/test/Analysis/ConstantFolding/copysign.ll delete mode 100644 llvm/test/Transforms/InstCombine/copysign.ll diff --git a/llvm/test/Analysis/ConstantFolding/copysign.ll b/llvm/test/Analysis/ConstantFolding/copysign.ll new file mode 100644 index 0000000000000..228ffcb470538 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/copysign.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -constprop < %s | FileCheck %s + +declare float @llvm.copysign.f32(float, float) +declare double @llvm.copysign.f64(double, double) + +define float @f32_01() { +; CHECK-LABEL: @f32_01( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.copysign.f32(float 1.0, float -2.0) + ret float %x +} + +define float @f32_02() { +; CHECK-LABEL: @f32_02( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @llvm.copysign.f32(float -2.0, float 1.0) + ret float %x +} + +define float @f32_03() { +; CHECK-LABEL: @f32_03( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.copysign.f32(float -2.0, float -1.0) + ret float %x +} + +define double @f64_01() { +; CHECK-LABEL: @f64_01( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.copysign.f64(double 1.0, double -2.0) + ret double %x +} + +define double @f64_02() { +; CHECK-LABEL: @f64_02( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @llvm.copysign.f64(double -1.0, double 2.0) + ret double %x +} + +define double @f64_03() { +; CHECK-LABEL: @f64_03( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.copysign.f64(double -1.0, double -2.0) + ret double %x +} diff --git a/llvm/test/Transforms/InstCombine/copysign.ll b/llvm/test/Transforms/InstCombine/copysign.ll deleted file mode 100644 index 556b79999b024..0000000000000 --- a/llvm/test/Transforms/InstCombine/copysign.ll +++ /dev/null @@ -1,49 +0,0 @@ -; RUN: opt -S -instcombine < %s | FileCheck %s - -declare float @llvm.copysign.f32(float, float) #0 -declare double @llvm.copysign.f64(double, double) #0 - -; CHECK-LABEL: @constant_fold_copysign_f32_01 -; CHECK-NEXT: ret float -1.000000e+00 -define float @constant_fold_copysign_f32_01() #0 { - %x = call float @llvm.copysign.f32(float 1.0, float -2.0) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_copysign_f32_02 -; CHECK-NEXT: ret float 2.000000e+00 -define float @constant_fold_copysign_f32_02() #0 { - %x = call float @llvm.copysign.f32(float -2.0, float 1.0) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_copysign_f32_03 -; CHECK-NEXT: ret float -2.000000e+00 -define float @constant_fold_copysign_f32_03() #0 { - %x = call float @llvm.copysign.f32(float -2.0, float -1.0) #0 - ret float %x -} - -; CHECK-LABEL: @constant_fold_copysign_f64_01 -; CHECK-NEXT: ret double -1.000000e+00 -define double @constant_fold_copysign_f64_01() #0 { - %x = call double @llvm.copysign.f64(double 1.0, double -2.0) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_copysign_f64_02 -; CHECK-NEXT: ret double 1.000000e+00 -define double @constant_fold_copysign_f64_02() #0 { - %x = call double @llvm.copysign.f64(double -1.0, double 2.0) #0 - ret double %x -} - -; CHECK-LABEL: @constant_fold_copysign_f64_03 -; CHECK-NEXT: ret double -1.000000e+00 -define double @constant_fold_copysign_f64_03() #0 { - %x = call double @llvm.copysign.f64(double -1.0, double -2.0) #0 - ret double %x -} - - -attributes #0 = { nounwind readnone } From 48a3a1e090611d1a71cb3c027e9316d048a67324 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 Nov 2019 17:23:30 -0500 Subject: [PATCH 072/591] [InstSimplify] add tests for copysign; NFC --- llvm/test/Transforms/InstSimplify/call.ll | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index 2fc0841a8e15b..b0f35ab371550 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -920,3 +920,24 @@ define double @fmuladd_nan_addend_neginf_inf(double %x, i1 %y) { %r = call double @llvm.fmuladd.f64(double %notnan, double 0xfff0000000000000, double 0x7ff0000000000000) ret double %r } + +declare float @llvm.copysign.f32(float, float) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) + +define float @copysign_same_operand(float %x) { +; CHECK-LABEL: @copysign_same_operand( +; CHECK-NEXT: [[R:%.*]] = call float @llvm.copysign.f32(float [[X:%.*]], float [[X]]) +; CHECK-NEXT: ret float [[R]] +; + %r = call float @llvm.copysign.f32(float %x, float %x) + ret float %r +} + +define <2 x double> @copysign_same_operand_vec(<2 x double> %x) { +; CHECK-LABEL: @copysign_same_operand_vec( +; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[X:%.*]], <2 x double> [[X]]) +; CHECK-NEXT: ret <2 x double> [[R]] +; + %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %x, <2 x double> %x) + ret <2 x double> %r +} From e177c5a00da34ba61b762e2b32bd96e33b0c10b4 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 26 Nov 2019 17:35:10 -0500 Subject: [PATCH 073/591] [InstSimplify] fold copysign with same args to the arg This is correct for any value including NaN/inf. We don't have this fold directly in the backend either, but x86 manages to get it after converting things to bitops. --- llvm/lib/Analysis/InstructionSimplify.cpp | 5 +++++ llvm/test/Transforms/InstSimplify/call.ll | 6 ++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index d997acb365c47..7942cb09e84c9 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5086,6 +5086,11 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return Op0; } break; + case Intrinsic::copysign: + // copysign X, X --> X + if (Op0 == Op1) + return Op0; + break; case Intrinsic::maxnum: case Intrinsic::minnum: case Intrinsic::maximum: diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index b0f35ab371550..4736adb972d9a 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -926,8 +926,7 @@ declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) define float @copysign_same_operand(float %x) { ; CHECK-LABEL: @copysign_same_operand( -; CHECK-NEXT: [[R:%.*]] = call float @llvm.copysign.f32(float [[X:%.*]], float [[X]]) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: ret float [[X:%.*]] ; %r = call float @llvm.copysign.f32(float %x, float %x) ret float %r @@ -935,8 +934,7 @@ define float @copysign_same_operand(float %x) { define <2 x double> @copysign_same_operand_vec(<2 x double> %x) { ; CHECK-LABEL: @copysign_same_operand_vec( -; CHECK-NEXT: [[R:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[X:%.*]], <2 x double> [[X]]) -; CHECK-NEXT: ret <2 x double> [[R]] +; CHECK-NEXT: ret <2 x double> [[X:%.*]] ; %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %x, <2 x double> %x) ret <2 x double> %r From 6c92cdff72251a7d13ab3958b04fba72dfcaebb1 Mon Sep 17 00:00:00 2001 From: Dan McGregor Date: Tue, 26 Nov 2019 14:23:07 -0800 Subject: [PATCH 074/591] Initial implementation of -fmacro-prefix-map and -ffile-prefix-map GCC 8 implements -fmacro-prefix-map. Like -fdebug-prefix-map, it replaces a string prefix for the __FILE__ macro. -ffile-prefix-map is the union of -fdebug-prefix-map and -fmacro-prefix-map Reviewed By: rnk, Lekensteyn, maskray Differential Revision: https://reviews.llvm.org/D49466 --- .../clang/Basic/DiagnosticDriverKinds.td | 4 +- clang/include/clang/Driver/Options.td | 6 +++ clang/include/clang/Lex/PreprocessorOptions.h | 5 +++ clang/lib/CodeGen/CGDebugInfo.cpp | 8 ++-- clang/lib/CodeGen/CGDebugInfo.h | 3 +- clang/lib/Driver/ToolChains/Clang.cpp | 23 ++++++++++- clang/lib/Driver/ToolChains/FreeBSD.cpp | 15 +++++++ clang/lib/Driver/ToolChains/Gnu.cpp | 13 ++++++ clang/lib/Frontend/CompilerInvocation.cpp | 3 ++ clang/lib/Lex/PPMacroExpansion.cpp | 15 ++++++- clang/test/CodeGen/debug-prefix-map.c | 2 + clang/test/Driver/debug-prefix-map.S | 1 + clang/test/Driver/debug-prefix-map.c | 37 +++++++++++++---- clang/test/Preprocessor/file_test.c | 22 ++++++++++ clang/test/Preprocessor/file_test.h | 2 + llvm/include/llvm/Support/Path.h | 23 +++++++++-- llvm/lib/Support/Path.cpp | 41 +++++++++++++++---- llvm/unittests/Support/Path.cpp | 29 +++++++++++++ 18 files changed, 221 insertions(+), 31 deletions(-) create mode 100644 clang/test/Preprocessor/file_test.c create mode 100644 clang/test/Preprocessor/file_test.h diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 0e309909030ed..b4904bb9d2dc0 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -143,8 +143,8 @@ def err_drv_missing_arg_mtp : Error< "missing argument to '%0'">; def err_drv_invalid_libcxx_deployment : Error< "invalid deployment target for -stdlib=libc++ (requires %0 or later)">; -def err_drv_invalid_argument_to_fdebug_prefix_map : Error< - "invalid argument '%0' to -fdebug-prefix-map">; +def err_drv_invalid_argument_to_option : Error< + "invalid argument '%0' to -%1">; def err_drv_malformed_sanitizer_blacklist : Error< "malformed sanitizer blacklist: '%0'">; def err_drv_duplicate_config : Error< diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1f0fc97b14e2b..2d501c09c7628 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1964,6 +1964,12 @@ def fdebug_prefix_map_EQ : Joined<["-"], "fdebug-prefix-map=">, Group, Flags<[CC1Option,CC1AsOption]>, HelpText<"remap file source paths in debug info">; +def ffile_prefix_map_EQ + : Joined<["-"], "ffile-prefix-map=">, Group, Flags<[CC1Option]>, + HelpText<"remap file source paths in debug info and predefined preprocessor macros">; +def fmacro_prefix_map_EQ + : Joined<["-"], "fmacro-prefix-map=">, Group, Flags<[CC1Option]>, + HelpText<"remap file source paths in predefined preprocessor macros">; def fforce_dwarf_frame : Flag<["-"], "fforce-dwarf-frame">, Group, Flags<[CC1Option]>, HelpText<"Always emit a debug frame section">; def fno_force_dwarf_frame : Flag<["-"], "fno-force-dwarf-frame">, Group, Flags<[CC1Option]>, diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 344afa8941723..abffbd03c3b48 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -13,6 +13,8 @@ #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include +#include #include #include #include @@ -173,6 +175,9 @@ class PreprocessorOptions { /// build it again. std::shared_ptr FailedModules; + /// A prefix map for __FILE__ and __BASE_FILE__. + std::map> MacroPrefixMap; + /// Contains the currently active skipped range mappings for skipping excluded /// conditional directives. /// diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index db5893a7b51f2..282a8e44d3861 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -476,10 +476,12 @@ CGDebugInfo::createFile(StringRef FileName, } std::string CGDebugInfo::remapDIPath(StringRef Path) const { + SmallString<256> p = Path; for (const auto &Entry : DebugPrefixMap) - if (Path.startswith(Entry.first)) - return (Twine(Entry.second) + Path.substr(Entry.first.size())).str(); - return Path.str(); + if (llvm::sys::path::replace_path_prefix( + p, Entry.first, Entry.second, llvm::sys::path::Style::native, true)) + break; + return p.str(); } unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) { diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 13e9c7a38fccd..8e74f7e019655 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -83,7 +83,8 @@ class CGDebugInfo { /// Cache of previously constructed Types. llvm::DenseMap TypeCache; - llvm::SmallDenseMap DebugPrefixMap; + std::map> + DebugPrefixMap; /// Cache that maps VLA types to size expressions for that type, /// represented by instantiated Metadata nodes. diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3e00c323fc65b..03ebef550cde7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -628,16 +628,33 @@ static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs, /// Add a CC1 and CC1AS option to specify the debug file path prefix map. static void addDebugPrefixMapArg(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) { - for (const Arg *A : Args.filtered(options::OPT_fdebug_prefix_map_EQ)) { + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fdebug_prefix_map_EQ)) { StringRef Map = A->getValue(); if (Map.find('=') == StringRef::npos) - D.Diag(diag::err_drv_invalid_argument_to_fdebug_prefix_map) << Map; + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); else CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map)); A->claim(); } } +/// Add a CC1 and CC1AS option to specify the macro file path prefix map. +static void addMacroPrefixMapArg(const Driver &D, const ArgList &Args, + ArgStringList &CmdArgs) { + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fmacro_prefix_map_EQ)) { + StringRef Map = A->getValue(); + if (Map.find('=') == StringRef::npos) + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); + else + CmdArgs.push_back(Args.MakeArgString("-fmacro-prefix-map=" + Map)); + A->claim(); + } +} + /// Vectorize at all optimization levels greater than 1 except for -Oz. /// For -Oz the loop vectorizer is disabled, while the slp vectorizer is /// enabled. @@ -1343,6 +1360,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, // For IAMCU add special include arguments. getToolChain().AddIAMCUIncludeArgs(Args, CmdArgs); } + + addMacroPrefixMapArg(D, Args, CmdArgs); } // FIXME: Move to target hook. diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index 3e5e8a00652d3..85e94fe018e6a 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -12,6 +12,7 @@ #include "Arch/Sparc.h" #include "CommonArgs.h" #include "clang/Driver/Compilation.h" +#include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "llvm/Option/ArgList.h" @@ -30,6 +31,7 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const char *LinkingOutput) const { claimNoWarnArgs(Args); ArgStringList CmdArgs; + const auto &D = getToolChain().getDriver(); // When building 32-bit code on FreeBSD/amd64, we have to explicitly // instruct as in the base system to assemble 32-bit code. @@ -103,6 +105,19 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA, } } + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fdebug_prefix_map_EQ)) { + StringRef Map = A->getValue(); + if (Map.find('=') == StringRef::npos) + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); + else { + CmdArgs.push_back(Args.MakeArgString("--debug-prefix-map")); + CmdArgs.push_back(Args.MakeArgString(Map)); + } + A->claim(); + } + Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 4c6956d54b7a5..1a729f112bd3c 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -868,6 +868,19 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, } } + for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, + options::OPT_fdebug_prefix_map_EQ)) { + StringRef Map = A->getValue(); + if (Map.find('=') == StringRef::npos) + D.Diag(diag::err_drv_invalid_argument_to_option) + << Map << A->getOption().getName(); + else { + CmdArgs.push_back(Args.MakeArgString("--debug-prefix-map")); + CmdArgs.push_back(Args.MakeArgString(Map)); + } + A->claim(); + } + Args.AddAllArgs(CmdArgs, options::OPT_I); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 080320cf60806..56f6653755754 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3315,6 +3315,9 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl)) Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue()); + for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) + Opts.MacroPrefixMap.insert(StringRef(A).split('=')); + if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) { StringRef Value(A->getValue()); size_t Comma = Value.find(','); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index a69c4dbb3a2ac..3b53d07cc4a9c 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -29,6 +29,7 @@ #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorLexer.h" +#include "clang/Lex/PreprocessorOptions.h" #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -1450,6 +1451,17 @@ static bool isTargetEnvironment(const TargetInfo &TI, return TI.getTriple().getEnvironment() == Env.getEnvironment(); } +static void remapMacroPath( + SmallString<256> &Path, + const std::map> + &MacroPrefixMap) { + for (const auto &Entry : MacroPrefixMap) + if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second, + llvm::sys::path::Style::native, + true)) + break; +} + /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded /// as a builtin macro, handle it and return the next token as 'Tok'. void Preprocessor::ExpandBuiltinMacro(Token &Tok) { @@ -1516,7 +1528,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { } // Escape this filename. Turn '\' -> '\\' '"' -> '\"' - SmallString<128> FN; + SmallString<256> FN; if (PLoc.isValid()) { // __FILE_NAME__ is a Clang-specific extension that expands to the // the last part of __FILE__. @@ -1532,6 +1544,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { FN += PLoc.getFilename(); } Lexer::Stringify(FN); + remapMacroPath(FN, PPOpts->MacroPrefixMap); OS << '"' << FN << '"'; } Tok.setKind(tok::string_literal); diff --git a/clang/test/CodeGen/debug-prefix-map.c b/clang/test/CodeGen/debug-prefix-map.c index d6032a658c2e2..abebc9a15106c 100644 --- a/clang/test/CodeGen/debug-prefix-map.c +++ b/clang/test/CodeGen/debug-prefix-map.c @@ -2,6 +2,8 @@ // RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/UNLIKELY_PATH=empty %s -emit-llvm -o - | FileCheck %s -check-prefix CHECK-EVIL // RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/UNLIKELY_PATH/empty %s -emit-llvm -o - -main-file-name debug-prefix-map.c | FileCheck %s // RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/UNLIKELY_PATH/empty %s -emit-llvm -o - -fdebug-compilation-dir %p | FileCheck %s -check-prefix CHECK-COMPILATION-DIR +// RUN: %clang -g -fdebug-prefix-map=%p=/UNLIKELY_PATH/empty -S -c %s -emit-llvm -o - | FileCheck %s +// RUN: %clang -g -ffile-prefix-map=%p=/UNLIKELY_PATH/empty -S -c %s -emit-llvm -o - | FileCheck %s #include "Inputs/stdio.h" diff --git a/clang/test/Driver/debug-prefix-map.S b/clang/test/Driver/debug-prefix-map.S index 2ba66be0edfce..7d12a17479726 100644 --- a/clang/test/Driver/debug-prefix-map.S +++ b/clang/test/Driver/debug-prefix-map.S @@ -1,4 +1,5 @@ // RUN: %clang -### -g -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s +// RUN: %clang -### -g -ffile-prefix-map=old=new %s 2>&1 | FileCheck %s // CHECK: cc1as // CHECK-SAME: -fdebug-prefix-map=old=new diff --git a/clang/test/Driver/debug-prefix-map.c b/clang/test/Driver/debug-prefix-map.c index b4f3859f982ab..f2c87cb7c11c9 100644 --- a/clang/test/Driver/debug-prefix-map.c +++ b/clang/test/Driver/debug-prefix-map.c @@ -1,9 +1,28 @@ -// RUN: %clang -### -fdebug-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-INVALID -// RUN: %clang -### -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-SIMPLE -// RUN: %clang -### -fdebug-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-COMPLEX -// RUN: %clang -### -fdebug-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-EMPTY - -// CHECK-INVALID: error: invalid argument 'old' to -fdebug-prefix-map -// CHECK-SIMPLE: fdebug-prefix-map=old=new -// CHECK-COMPLEX: fdebug-prefix-map=old=n=ew -// CHECK-EMPTY: fdebug-prefix-map=old= +// RUN: %clang -### -fdebug-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-INVALID +// RUN: %clang -### -fmacro-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-INVALID +// RUN: %clang -### -ffile-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-FILE-INVALID + +// RUN: %clang -### -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-SIMPLE +// RUN: %clang -### -fmacro-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-SIMPLE +// RUN: %clang -### -ffile-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-SIMPLE +// RUN: %clang -### -ffile-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-SIMPLE + +// RUN: %clang -### -fdebug-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-COMPLEX +// RUN: %clang -### -fmacro-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-COMPLEX +// RUN: %clang -### -ffile-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-COMPLEX +// RUN: %clang -### -ffile-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-COMPLEX + +// RUN: %clang -### -fdebug-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-EMPTY +// RUN: %clang -### -fmacro-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-EMPTY +// RUN: %clang -### -ffile-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-DEBUG-EMPTY +// RUN: %clang -### -ffile-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-MACRO-EMPTY + +// CHECK-DEBUG-INVALID: error: invalid argument 'old' to -fdebug-prefix-map +// CHECK-MACRO-INVALID: error: invalid argument 'old' to -fmacro-prefix-map +// CHECK-FILE-INVALID: error: invalid argument 'old' to -ffile-prefix-map +// CHECK-DEBUG-SIMPLE: fdebug-prefix-map=old=new +// CHECK-MACRO-SIMPLE: fmacro-prefix-map=old=new +// CHECK-DEBUG-COMPLEX: fdebug-prefix-map=old=n=ew +// CHECK-MACRO-COMPLEX: fmacro-prefix-map=old=n=ew +// CHECK-DEBUG-EMPTY: fdebug-prefix-map=old= +// CHECK-MACRO-EMPTY: fmacro-prefix-map=old= diff --git a/clang/test/Preprocessor/file_test.c b/clang/test/Preprocessor/file_test.c new file mode 100644 index 0000000000000..bdc5f1df65993 --- /dev/null +++ b/clang/test/Preprocessor/file_test.c @@ -0,0 +1,22 @@ +// RUN: %clang -E -ffile-prefix-map=%p=/UNLIKELY_PATH/empty -c -o - %s | FileCheck %s +// RUN: %clang -E -fmacro-prefix-map=%p=/UNLIKELY_PATH/empty -c -o - %s | FileCheck %s +// RUN: %clang -E -fmacro-prefix-map=%p=/UNLIKELY_PATH=empty -c -o - %s | FileCheck %s -check-prefix CHECK-EVIL +// RUN: %clang -E -fmacro-prefix-map=%p/= -c -o - %s | FileCheck %s --check-prefix CHECK-REMOVE + +filename: __FILE__ +#include "file_test.h" + +// CHECK: filename: "/UNLIKELY_PATH/empty{{[/\\]}}file_test.c" +// CHECK: filename: "/UNLIKELY_PATH/empty{{[/\\]}}file_test.h" +// CHECK: basefile: "/UNLIKELY_PATH/empty{{[/\\]}}file_test.c" +// CHECK-NOT: filename: + +// CHECK-EVIL: filename: "/UNLIKELY_PATH=empty{{[/\\]}}file_test.c" +// CHECK-EVIL: filename: "/UNLIKELY_PATH=empty{{[/\\]}}file_test.h" +// CHECK-EVIL: basefile: "/UNLIKELY_PATH=empty{{[/\\]}}file_test.c" +// CHECK-EVIL-NOT: filename: + +// CHECK-REMOVE: filename: "file_test.c" +// CHECK-REMOVE: filename: "file_test.h" +// CHECK-REMOVE: basefile: "file_test.c" +// CHECK-REMOVE-NOT: filename: diff --git a/clang/test/Preprocessor/file_test.h b/clang/test/Preprocessor/file_test.h new file mode 100644 index 0000000000000..c289e5c836280 --- /dev/null +++ b/clang/test/Preprocessor/file_test.h @@ -0,0 +1,2 @@ +filename: __FILE__ +basefile: __BASE_FILE__ diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h index 488f17427fd7f..97955f882d51e 100644 --- a/llvm/include/llvm/Support/Path.h +++ b/llvm/include/llvm/Support/Path.h @@ -152,18 +152,33 @@ void replace_extension(SmallVectorImpl &path, const Twine &extension, /// /// @code /// /foo, /old, /new => /foo +/// /old, /old, /new => /new +/// /old, /old/, /new, false => /old +/// /old, /old/, /new, true => /new /// /old/foo, /old, /new => /new/foo +/// /old/foo, /old/, /new => /new/foo +/// /old/foo, /old/, /new/ => /new/foo +/// /oldfoo, /old, /new => /oldfoo /// /foo, , /new => /new/foo -/// /old/foo, /old, => /foo +/// /foo, , new => new/foo +/// /old/foo, /old, , false => /foo +/// /old/foo, /old, , true => foo /// @endcode /// /// @param Path If \a Path starts with \a OldPrefix modify to instead /// start with \a NewPrefix. -/// @param OldPrefix The path prefix to strip from \a Path. +/// @param OldPrefix The path prefix to strip from \a Path. Any trailing +/// path separator is ignored if strict is true. /// @param NewPrefix The path prefix to replace \a NewPrefix with. -void replace_path_prefix(SmallVectorImpl &Path, +/// @param style The path separator style +/// @param strict If strict is true, a directory separator following +/// \a OldPrefix will also be stripped. Otherwise, directory +/// separators will only be matched and stripped when present +/// in \a OldPrefix. +/// @result true if \a Path begins with OldPrefix +bool replace_path_prefix(SmallVectorImpl &Path, const StringRef &OldPrefix, const StringRef &NewPrefix, - Style style = Style::native); + Style style = Style::native, bool strict = false); /// Append to path. /// diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index 14def83802daf..3c9a08cb4077d 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -496,27 +496,50 @@ void replace_extension(SmallVectorImpl &path, const Twine &extension, path.append(ext.begin(), ext.end()); } -void replace_path_prefix(SmallVectorImpl &Path, +bool replace_path_prefix(SmallVectorImpl &Path, const StringRef &OldPrefix, const StringRef &NewPrefix, - Style style) { + Style style, bool strict) { if (OldPrefix.empty() && NewPrefix.empty()) - return; + return false; StringRef OrigPath(Path.begin(), Path.size()); - if (!OrigPath.startswith(OldPrefix)) - return; + StringRef OldPrefixDir; + + if (!strict && OldPrefix.size() > OrigPath.size()) + return false; + + // Ensure OldPrefixDir does not have a trailing separator. + if (!OldPrefix.empty() && is_separator(OldPrefix.back())) + OldPrefixDir = parent_path(OldPrefix, style); + else + OldPrefixDir = OldPrefix; + + if (!OrigPath.startswith(OldPrefixDir)) + return false; + + if (OrigPath.size() > OldPrefixDir.size()) + if (!is_separator(OrigPath[OldPrefixDir.size()], style) && strict) + return false; // If prefixes have the same size we can simply copy the new one over. - if (OldPrefix.size() == NewPrefix.size()) { + if (OldPrefixDir.size() == NewPrefix.size() && !strict) { llvm::copy(NewPrefix, Path.begin()); - return; + return true; } - StringRef RelPath = OrigPath.substr(OldPrefix.size()); + StringRef RelPath = OrigPath.substr(OldPrefixDir.size()); SmallString<256> NewPath; path::append(NewPath, style, NewPrefix); - path::append(NewPath, style, RelPath); + if (!RelPath.empty()) { + if (!is_separator(RelPath[0], style) || !strict) + path::append(NewPath, style, RelPath); + else + path::append(NewPath, style, relative_path(RelPath, style)); + } + Path.swap(NewPath); + + return true; } void native(const Twine &path, SmallVectorImpl &result, Style style) { diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp index 9de46a689cd72..1f7a10d94f292 100644 --- a/llvm/unittests/Support/Path.cpp +++ b/llvm/unittests/Support/Path.cpp @@ -1230,7 +1230,9 @@ TEST(Support, RemoveDots) { TEST(Support, ReplacePathPrefix) { SmallString<64> Path1("/foo"); SmallString<64> Path2("/old/foo"); + SmallString<64> Path3("/oldnew/foo"); SmallString<64> OldPrefix("/old"); + SmallString<64> OldPrefixSep("/old/"); SmallString<64> NewPrefix("/new"); SmallString<64> NewPrefix2("/longernew"); SmallString<64> EmptyPrefix(""); @@ -1250,6 +1252,33 @@ TEST(Support, ReplacePathPrefix) { Path = Path2; path::replace_path_prefix(Path, OldPrefix, EmptyPrefix); EXPECT_EQ(Path, "/foo"); + Path = Path2; + path::replace_path_prefix(Path, OldPrefix, EmptyPrefix, true); + EXPECT_EQ(Path, "foo"); + Path = Path3; + path::replace_path_prefix(Path, OldPrefix, NewPrefix, false); + EXPECT_EQ(Path, "/newnew/foo"); + Path = Path3; + path::replace_path_prefix(Path, OldPrefix, NewPrefix, true); + EXPECT_EQ(Path, "/oldnew/foo"); + Path = Path3; + path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, true); + EXPECT_EQ(Path, "/oldnew/foo"); + Path = Path1; + path::replace_path_prefix(Path, EmptyPrefix, NewPrefix); + EXPECT_EQ(Path, "/new/foo"); + Path = OldPrefix; + path::replace_path_prefix(Path, OldPrefix, NewPrefix); + EXPECT_EQ(Path, "/new"); + Path = OldPrefixSep; + path::replace_path_prefix(Path, OldPrefix, NewPrefix); + EXPECT_EQ(Path, "/new/"); + Path = OldPrefix; + path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, false); + EXPECT_EQ(Path, "/old"); + Path = OldPrefix; + path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, true); + EXPECT_EQ(Path, "/new"); } TEST_F(FileSystemTest, OpenFileForRead) { From fc6a6900cf8970e54192fae2e32e38ec4f9bec2e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 26 Nov 2019 15:34:48 -0800 Subject: [PATCH 075/591] [unittest] Fix unittests/Support/Path.cpp after D49466 --- llvm/unittests/Support/Path.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp index 1f7a10d94f292..b143ea6d20630 100644 --- a/llvm/unittests/Support/Path.cpp +++ b/llvm/unittests/Support/Path.cpp @@ -1253,16 +1253,20 @@ TEST(Support, ReplacePathPrefix) { path::replace_path_prefix(Path, OldPrefix, EmptyPrefix); EXPECT_EQ(Path, "/foo"); Path = Path2; - path::replace_path_prefix(Path, OldPrefix, EmptyPrefix, true); + path::replace_path_prefix(Path, OldPrefix, EmptyPrefix, path::Style::native, + true); EXPECT_EQ(Path, "foo"); Path = Path3; - path::replace_path_prefix(Path, OldPrefix, NewPrefix, false); + path::replace_path_prefix(Path, OldPrefix, NewPrefix, path::Style::native, + false); EXPECT_EQ(Path, "/newnew/foo"); Path = Path3; - path::replace_path_prefix(Path, OldPrefix, NewPrefix, true); + path::replace_path_prefix(Path, OldPrefix, NewPrefix, path::Style::native, + true); EXPECT_EQ(Path, "/oldnew/foo"); Path = Path3; - path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, true); + path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, path::Style::native, + true); EXPECT_EQ(Path, "/oldnew/foo"); Path = Path1; path::replace_path_prefix(Path, EmptyPrefix, NewPrefix); @@ -1274,10 +1278,12 @@ TEST(Support, ReplacePathPrefix) { path::replace_path_prefix(Path, OldPrefix, NewPrefix); EXPECT_EQ(Path, "/new/"); Path = OldPrefix; - path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, false); + path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, path::Style::native, + false); EXPECT_EQ(Path, "/old"); Path = OldPrefix; - path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, true); + path::replace_path_prefix(Path, OldPrefixSep, NewPrefix, path::Style::native, + true); EXPECT_EQ(Path, "/new"); } From df773ebb5f8ff54cda385f4491ff877464228c18 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 26 Nov 2019 15:39:33 -0800 Subject: [PATCH 076/591] [X86] Add test cases for constrained lrint/llrint/lround/llround to fp128-libcalls-strict. NFC --- .../test/CodeGen/X86/fp128-libcalls-strict.ll | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index a37adcb107c39..9f5c32421dcc1 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -296,6 +296,54 @@ entry: ret fp128 %trunc } +define i32 @lrint(fp128 %x) nounwind strictfp { +; CHECK-LABEL: lrint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq lrintl +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +entry: + %rint = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret i32 %rint +} + +define i64 @llrint(fp128 %x) nounwind strictfp { +; CHECK-LABEL: llrint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq llrintl +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +entry: + %rint = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret i64 %rint +} + +define i32 @lround(fp128 %x) nounwind strictfp { +; CHECK-LABEL: lround: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq lroundl +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +entry: + %round = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i32 %round +} + +define i64 @llround(fp128 %x) nounwind strictfp { +; CHECK-LABEL: llround: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq llroundl +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +entry: + %round = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i64 %round +} + attributes #0 = { strictfp } declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata) @@ -322,3 +370,7 @@ declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadat declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f128(fp128, metadata) From 3bb24bf25767ef5bbcef958b484e7a06d8689204 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 26 Nov 2019 16:09:22 -0800 Subject: [PATCH 077/591] Fix tests on Windows after D49466 It is tricky to use replace_path_prefix correctly on Windows which uses backslashes as native path separators. Switch back to the old approach (startswith is not ideal) to appease build bots for now. --- clang/lib/CodeGen/CGDebugInfo.cpp | 8 +++----- clang/lib/Lex/PPMacroExpansion.cpp | 6 +++--- clang/test/CodeGen/debug-prefix-map.c | 12 ++++++------ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 282a8e44d3861..db5893a7b51f2 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -476,12 +476,10 @@ CGDebugInfo::createFile(StringRef FileName, } std::string CGDebugInfo::remapDIPath(StringRef Path) const { - SmallString<256> p = Path; for (const auto &Entry : DebugPrefixMap) - if (llvm::sys::path::replace_path_prefix( - p, Entry.first, Entry.second, llvm::sys::path::Style::native, true)) - break; - return p.str(); + if (Path.startswith(Entry.first)) + return (Twine(Entry.second) + Path.substr(Entry.first.size())).str(); + return Path.str(); } unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) { diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 3b53d07cc4a9c..cf8bb2fbab991 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1456,10 +1456,10 @@ static void remapMacroPath( const std::map> &MacroPrefixMap) { for (const auto &Entry : MacroPrefixMap) - if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second, - llvm::sys::path::Style::native, - true)) + if (Path.startswith(Entry.first)) { + Path = (Twine(Entry.second) + Path.substr(Entry.first.size())).str(); break; + } } /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded diff --git a/clang/test/CodeGen/debug-prefix-map.c b/clang/test/CodeGen/debug-prefix-map.c index abebc9a15106c..5366e19447ae2 100644 --- a/clang/test/CodeGen/debug-prefix-map.c +++ b/clang/test/CodeGen/debug-prefix-map.c @@ -19,21 +19,21 @@ void test_rewrite_includes() { } // CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}" -// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}{{.*}}", +// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}{{.*}}", // On POSIX systems "Dir" should actually be empty, but on Windows we // can't recognize "/UNLIKELY_PATH" as being an absolute path. // CHECK-NO-MAIN-FILE-NAME-SAME: directory: "{{()|(.*:.*)}}") -// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}Inputs/stdio.h", +// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}Inputs/stdio.h", // CHECK-NO-MAIN-FILE-NAME-SAME: directory: "{{()|(.*:.*)}}") // CHECK-NO-MAIN-FILE-NAME-NOT: !DIFile(filename: -// CHECK-EVIL: !DIFile(filename: "/UNLIKELY_PATH=empty{{[/\\]}}{{.*}}" -// CHECK-EVIL: !DIFile(filename: "/UNLIKELY_PATH=empty{{[/\\]}}{{.*}}Inputs/stdio.h", +// CHECK-EVIL: !DIFile(filename: "/UNLIKELY_PATH=empty{{/|\\\\}}{{.*}}" +// CHECK-EVIL: !DIFile(filename: "/UNLIKELY_PATH=empty{{/|\\\\}}{{.*}}Inputs/stdio.h", // CHECK-EVIL-SAME: directory: "{{()|(.*:.*)}}") // CHECK-EVIL-NOT: !DIFile(filename: -// CHECK: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}{{.*}}" -// CHECK: !DIFile(filename: "/UNLIKELY_PATH/empty{{[/\\]}}{{.*}}Inputs/stdio.h", +// CHECK: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}{{.*}}" +// CHECK: !DIFile(filename: "/UNLIKELY_PATH/empty{{/|\\\\}}{{.*}}Inputs/stdio.h", // CHECK-SAME: directory: "{{()|(.*:.*)}}") // CHECK-NOT: !DIFile(filename: From ad58d1a9d117d46916bfff77aad0c369cee91cea Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 26 Nov 2019 16:18:29 -0800 Subject: [PATCH 078/591] [CodeMoverUtils] Don't dereference nullptr in test --- llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp index 64c4f796cb819..0bb32a4d9dda5 100644 --- a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp @@ -142,7 +142,7 @@ TEST(CodeMoverUtils, BasicTest) { EXPECT_FALSE(isSafeToMoveBefore(*SI_A5, *Entry->getTerminator(), DT, PDT, DI)); // Moving PHINode is not supported. - EXPECT_FALSE(isSafeToMoveBefore(PN, *PN.getPrevNode(), DT, PDT, DI)); + EXPECT_FALSE(isSafeToMoveBefore(PN, *PN.getNextNode()->getNextNode(), DT, PDT, DI)); // Cannot move non-PHINode before PHINode. EXPECT_FALSE(isSafeToMoveBefore(*PN.getNextNode(), PN, DT, PDT, DI)); From 5e40f2cf0fec7975ef5d4c8f9fd7fce49c754c9c Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 26 Nov 2019 16:21:07 -0800 Subject: [PATCH 079/591] [CodeMoverUtils] clang-format the test --- .../Transforms/Utils/CodeMoverUtilsTest.cpp | 100 ++++++++++-------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp index 0bb32a4d9dda5..887c9c9558212 100644 --- a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp @@ -65,39 +65,38 @@ TEST(CodeMoverUtils, BasicTest) { // } // } std::unique_ptr M = parseIR( - C, - "define void @foo(i32* noalias %A, i32* noalias %B, i32* noalias %C\n" - " , i64 %N) {\n" - "entry:\n" - " %X = sdiv i64 1, %N\n" - " call void @safecall()\n" - " %cmp1 = icmp slt i64 0, %N\n" - " call void @unsafecall1()\n" - " call void @unsafecall2()\n" - " br i1 %cmp1, label %for.body, label %for.end\n" - "for.body:\n" - " %i = phi i64 [ 0, %entry ], [ %inc, %for.body ]\n" - " %arrayidx_A5 = getelementptr inbounds i32, i32* %A, i64 5\n" - " store i32 5, i32* %arrayidx_A5, align 4\n" - " %arrayidx_A = getelementptr inbounds i32, i32* %A, i64 %i\n" - " store i32 0, i32* %arrayidx_A, align 4\n" - " %load1 = load i32, i32* %arrayidx_A, align 4\n" - " %arrayidx_B = getelementptr inbounds i32, i32* %B, i64 %i\n" - " store i32 %load1, i32* %arrayidx_B, align 4\n" - " %load2 = load i32, i32* %arrayidx_A, align 4\n" - " %arrayidx_C = getelementptr inbounds i32, i32* %C, i64 %i\n" - " store i32 %load2, i32* %arrayidx_C, align 4\n" - " %arrayidx_A6 = getelementptr inbounds i32, i32* %A, i64 6\n" - " store i32 6, i32* %arrayidx_A6, align 4\n" - " %inc = add nsw i64 %i, 1\n" - " %cmp = icmp slt i64 %inc, %N\n" - " br i1 %cmp, label %for.body, label %for.end\n" - "for.end:\n" - " ret void\n" - "}\n" - "declare void @safecall() nounwind nosync willreturn\n" - "declare void @unsafecall1()\n" - "declare void @unsafecall2()\n"); + C, "define void @foo(i32* noalias %A, i32* noalias %B, i32* noalias %C\n" + " , i64 %N) {\n" + "entry:\n" + " %X = sdiv i64 1, %N\n" + " call void @safecall()\n" + " %cmp1 = icmp slt i64 0, %N\n" + " call void @unsafecall1()\n" + " call void @unsafecall2()\n" + " br i1 %cmp1, label %for.body, label %for.end\n" + "for.body:\n" + " %i = phi i64 [ 0, %entry ], [ %inc, %for.body ]\n" + " %arrayidx_A5 = getelementptr inbounds i32, i32* %A, i64 5\n" + " store i32 5, i32* %arrayidx_A5, align 4\n" + " %arrayidx_A = getelementptr inbounds i32, i32* %A, i64 %i\n" + " store i32 0, i32* %arrayidx_A, align 4\n" + " %load1 = load i32, i32* %arrayidx_A, align 4\n" + " %arrayidx_B = getelementptr inbounds i32, i32* %B, i64 %i\n" + " store i32 %load1, i32* %arrayidx_B, align 4\n" + " %load2 = load i32, i32* %arrayidx_A, align 4\n" + " %arrayidx_C = getelementptr inbounds i32, i32* %C, i64 %i\n" + " store i32 %load2, i32* %arrayidx_C, align 4\n" + " %arrayidx_A6 = getelementptr inbounds i32, i32* %A, i64 6\n" + " store i32 6, i32* %arrayidx_A6, align 4\n" + " %inc = add nsw i64 %i, 1\n" + " %cmp = icmp slt i64 %inc, %N\n" + " br i1 %cmp, label %for.body, label %for.end\n" + "for.end:\n" + " ret void\n" + "}\n" + "declare void @safecall() nounwind nosync willreturn\n" + "declare void @unsafecall1()\n" + "declare void @unsafecall2()\n"); run(*M, "foo", [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT, @@ -106,9 +105,11 @@ TEST(CodeMoverUtils, BasicTest) { BasicBlock *Entry = &*(FI++); assert(Entry->getName() == "entry" && "Expecting BasicBlock entry"); Instruction *CI_safecall = Entry->front().getNextNode(); - assert(isa(CI_safecall) && "Expecting CI_safecall to be a CallInst"); + assert(isa(CI_safecall) && + "Expecting CI_safecall to be a CallInst"); Instruction *CI_unsafecall = CI_safecall->getNextNode()->getNextNode(); - assert(isa(CI_unsafecall) && "Expecting CI_unsafecall to be a CallInst"); + assert(isa(CI_unsafecall) && + "Expecting CI_unsafecall to be a CallInst"); BasicBlock *ForBody = &*(FI++); assert(ForBody->getName() == "for.body" && "Expecting BasicBlock for.body"); @@ -126,39 +127,48 @@ TEST(CodeMoverUtils, BasicTest) { assert(LI1->getName() == "load1" && "Expecting LI1 to be load1"); Instruction *LI2 = LI1->getNextNode()->getNextNode()->getNextNode(); assert(LI2->getName() == "load2" && "Expecting LI2 to be load2"); - Instruction *SI_A6 = LI2->getNextNode()->getNextNode()->getNextNode()->getNextNode(); + Instruction *SI_A6 = + LI2->getNextNode()->getNextNode()->getNextNode()->getNextNode(); assert(isa(SI_A6) && SI_A6->getOperand(1)->getName() == "arrayidx_A6" && "Expecting store to arrayidx_A6"); - // Can move after CI_safecall, as it does not throw, not synchronize, or must return. - EXPECT_TRUE(isSafeToMoveBefore(*CI_safecall->getPrevNode(), *CI_safecall->getNextNode(), DT, PDT, DI)); + // Can move after CI_safecall, as it does not throw, not synchronize, or + // must return. + EXPECT_TRUE(isSafeToMoveBefore(*CI_safecall->getPrevNode(), + *CI_safecall->getNextNode(), DT, PDT, + DI)); // Cannot move CI_unsafecall, as it may throw. - EXPECT_FALSE(isSafeToMoveBefore(*CI_unsafecall->getNextNode(), *CI_unsafecall, DT, PDT, DI)); + EXPECT_FALSE(isSafeToMoveBefore(*CI_unsafecall->getNextNode(), + *CI_unsafecall, DT, PDT, DI)); // Moving instruction to non control flow equivalent places are not // supported. - EXPECT_FALSE(isSafeToMoveBefore(*SI_A5, *Entry->getTerminator(), DT, PDT, DI)); + EXPECT_FALSE( + isSafeToMoveBefore(*SI_A5, *Entry->getTerminator(), DT, PDT, DI)); // Moving PHINode is not supported. - EXPECT_FALSE(isSafeToMoveBefore(PN, *PN.getNextNode()->getNextNode(), DT, PDT, DI)); + EXPECT_FALSE(isSafeToMoveBefore(PN, *PN.getNextNode()->getNextNode(), + DT, PDT, DI)); // Cannot move non-PHINode before PHINode. EXPECT_FALSE(isSafeToMoveBefore(*PN.getNextNode(), PN, DT, PDT, DI)); // Moving Terminator is not supported. - EXPECT_FALSE(isSafeToMoveBefore(*Entry->getTerminator(), *PN.getNextNode(), DT, - PDT, DI)); + EXPECT_FALSE(isSafeToMoveBefore(*Entry->getTerminator(), + *PN.getNextNode(), DT, PDT, DI)); // Cannot move %arrayidx_A after SI, as SI is its user. - EXPECT_FALSE(isSafeToMoveBefore(*SI->getPrevNode(), *SI->getNextNode(), DT, PDT, DI)); + EXPECT_FALSE(isSafeToMoveBefore(*SI->getPrevNode(), *SI->getNextNode(), + DT, PDT, DI)); // Cannot move SI before %arrayidx_A, as %arrayidx_A is its operand. EXPECT_FALSE(isSafeToMoveBefore(*SI, *SI->getPrevNode(), DT, PDT, DI)); // Cannot move LI2 after SI_A6, as there is a flow dependence. - EXPECT_FALSE(isSafeToMoveBefore(*LI2, *SI_A6->getNextNode(), DT, PDT, DI)); + EXPECT_FALSE( + isSafeToMoveBefore(*LI2, *SI_A6->getNextNode(), DT, PDT, DI)); // Cannot move SI after LI1, as there is a anti dependence. EXPECT_FALSE(isSafeToMoveBefore(*SI, *LI1->getNextNode(), DT, PDT, DI)); From 1260ea7421a377ed5dc5dc366c3f5a9537e8ebcd Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Wed, 27 Nov 2019 00:30:29 +0000 Subject: [PATCH 080/591] [PowerPC] [NFC] rename PPCLoopPreIncPrep.cpp to PPCLoopInstrFormPrep.cpp after D67088 Summary: This is NFC code clean work after D67088. In that patch, we extend loop instructions prep for ds/dq form. This patch only changes the file name PPCLoopPreIncPrep.cpp to PPCLoopInstrFormPrep.cpp for better reviewing of the content change of file PPCLoopInstrFormPrep.cpp. Reviewers: #powerpc, nemanjai, steven.zhang, shchenz Reviewed By: #powerpc, shchenz Subscribers: wuzish, mgorny, hiraditya, kbarton, shchenz, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70716 --- llvm/lib/Target/PowerPC/CMakeLists.txt | 2 +- .../PowerPC/{PPCLoopPreIncPrep.cpp => PPCLoopInstrFormPrep.cpp} | 0 llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename llvm/lib/Target/PowerPC/{PPCLoopPreIncPrep.cpp => PPCLoopInstrFormPrep.cpp} (100%) diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 28d7840d54124..1893d6e32c9ac 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -29,7 +29,7 @@ add_llvm_target(PowerPCCodeGen PPCEarlyReturn.cpp PPCFastISel.cpp PPCFrameLowering.cpp - PPCLoopPreIncPrep.cpp + PPCLoopInstrFormPrep.cpp PPCMCInstLower.cpp PPCMachineFunctionInfo.cpp PPCMachineScheduler.cpp diff --git a/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp similarity index 100% rename from llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp rename to llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn index 51c78d26f1db5..b27a460dfba01 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/BUILD.gn @@ -53,7 +53,7 @@ static_library("LLVMPowerPCCodeGen") { "PPCISelDAGToDAG.cpp", "PPCISelLowering.cpp", "PPCInstrInfo.cpp", - "PPCLoopPreIncPrep.cpp", + "PPCLoopInstrFormPrep.cpp", "PPCLowerMASSVEntries.cpp", "PPCMCInstLower.cpp", "PPCMIPeephole.cpp", From 75fd939bb917e8f843395684a2970d86bc0199c0 Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Tue, 26 Nov 2019 17:17:21 -0800 Subject: [PATCH 081/591] [ELF] Adjust test to work for zlib 1.2.8 The previous data had the same length with compression levels 1 and 6 for zlib 1.2.8. Adjust the test to work for this library version. I've also tested this with zlib 1.2.7 and zlib 1.2.11. --- lld/test/ELF/compressed-debug-level.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/test/ELF/compressed-debug-level.test b/lld/test/ELF/compressed-debug-level.test index 38d7d9016d127..d755e9fedf13b 100644 --- a/lld/test/ELF/compressed-debug-level.test +++ b/lld/test/ELF/compressed-debug-level.test @@ -21,7 +21,7 @@ # HEADER: [Nr] Name Type Address Off Size # LEVEL1: [ 1] .debug_info PROGBITS 00000000 000094 00001c -# LEVEL6: [ 1] .debug_info PROGBITS 00000000 000094 00001b +# LEVEL6: [ 1] .debug_info PROGBITS 00000000 000094 00001a ## A little arbitrary debug section which has a different size after ## applying compression of level 1 and 6. @@ -35,4 +35,4 @@ FileHeader: Sections: - Name: .debug_info Type: SHT_PROGBITS - Content: '01010201020201020102' + Content: '010101010101010201010201' From 7ddc6287a08ef758d66acb20d006c9ab0c579fcc Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 26 Nov 2019 17:27:16 -0800 Subject: [PATCH 082/591] [Preprocessor] Fix backslash tests on Windows after D49466 See http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-windows10pro-fast/builds/29442/steps/test-check-all/logs/stdio --- clang/test/Preprocessor/file_test.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/Preprocessor/file_test.c b/clang/test/Preprocessor/file_test.c index bdc5f1df65993..09ae48f48e097 100644 --- a/clang/test/Preprocessor/file_test.c +++ b/clang/test/Preprocessor/file_test.c @@ -6,14 +6,14 @@ filename: __FILE__ #include "file_test.h" -// CHECK: filename: "/UNLIKELY_PATH/empty{{[/\\]}}file_test.c" -// CHECK: filename: "/UNLIKELY_PATH/empty{{[/\\]}}file_test.h" -// CHECK: basefile: "/UNLIKELY_PATH/empty{{[/\\]}}file_test.c" +// CHECK: filename: "/UNLIKELY_PATH/empty{{/|\\\\}}file_test.c" +// CHECK: filename: "/UNLIKELY_PATH/empty{{/|\\\\}}file_test.h" +// CHECK: basefile: "/UNLIKELY_PATH/empty{{/|\\\\}}file_test.c" // CHECK-NOT: filename: -// CHECK-EVIL: filename: "/UNLIKELY_PATH=empty{{[/\\]}}file_test.c" -// CHECK-EVIL: filename: "/UNLIKELY_PATH=empty{{[/\\]}}file_test.h" -// CHECK-EVIL: basefile: "/UNLIKELY_PATH=empty{{[/\\]}}file_test.c" +// CHECK-EVIL: filename: "/UNLIKELY_PATH=empty{{/|\\\\}}file_test.c" +// CHECK-EVIL: filename: "/UNLIKELY_PATH=empty{{/|\\\\}}file_test.h" +// CHECK-EVIL: basefile: "/UNLIKELY_PATH=empty{{/|\\\\}}file_test.c" // CHECK-EVIL-NOT: filename: // CHECK-REMOVE: filename: "file_test.c" From 350565dbc06d32096ae3ade7bc3d2e58ac654273 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 26 Nov 2019 17:37:51 -0800 Subject: [PATCH 083/591] [LegalizeTypes] Add SoftenFloatOp_Unary to reduce some duplication for softening LRINT/LLRINT/LROUND/LLROUND Summary: This will be enhanced in a follow up to add strict fp support Reviewers: efriedma Reviewed By: efriedma Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70751 --- .../SelectionDAG/LegalizeFloatTypes.cpp | 89 ++++++++----------- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + 2 files changed, 36 insertions(+), 54 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 1ddf49c73e866..65ccb996299d1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -945,72 +945,53 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LROUND_F32, - RTLIB::LROUND_F64, - RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { + EVT OpVT = N->getOperand(0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LROUND_F32, + RTLIB::LROUND_F64, + RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); +} - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType(); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, - RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; +SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { + EVT OpVT = N->getOperand(0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, + RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType(); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LRINT_F32, - RTLIB::LRINT_F64, - RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LRINT_F32, + RTLIB::LRINT_F64, + RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - EVT RetVT = N->getOperand(0).getValueType(); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, - RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, - RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(0).getValueType(); + return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, + RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, + RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 755a6fe909878..7274a521f468e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -530,6 +530,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); From ded249049429a26d3748926c04bd7169f0170714 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 26 Nov 2019 13:13:47 -0500 Subject: [PATCH 084/591] Workaround for EvalInfo ctor for MSVC 2017 Current EvalInfo ctor causes EnableNewConstInterp to be true even though it is supposed to be false on MSVC 2017. This is because a virtual function getLangOpts() is called in member initializer lists, whereas on MSVC member ctors are called before function virtual function pointers are initialized. This patch fixes that. Differential Revision: https://reviews.llvm.org/D70729 --- clang/lib/AST/ExprConstant.cpp | 6 +++--- clang/test/Sema/eval-info.c | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 clang/test/Sema/eval-info.c diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 79659261388b4..eec9bbdaef806 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -921,10 +921,10 @@ namespace { EvalInfo(const ASTContext &C, Expr::EvalStatus &S, EvaluationMode Mode) : Ctx(const_cast(C)), EvalStatus(S), CurrentCall(nullptr), CallStackDepth(0), NextCallIndex(1), - StepsLeft(getLangOpts().ConstexprStepLimit), - ForceNewConstInterp(getLangOpts().ForceNewConstInterp), + StepsLeft(C.getLangOpts().ConstexprStepLimit), + ForceNewConstInterp(C.getLangOpts().ForceNewConstInterp), EnableNewConstInterp(ForceNewConstInterp || - getLangOpts().EnableNewConstInterp), + C.getLangOpts().EnableNewConstInterp), BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr), EvaluatingDecl((const ValueDecl *)nullptr), EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false), diff --git a/clang/test/Sema/eval-info.c b/clang/test/Sema/eval-info.c new file mode 100644 index 0000000000000..7f4de4b908207 --- /dev/null +++ b/clang/test/Sema/eval-info.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -fsyntax-only -triple x86_64-unknown-windows-msvc -verify + +// expected-no-diagnostics + +// Make sure the new constant interpolator is not enabled unintentionally +// to cause assertion. +typedef enum x { + a = 1, +} x; From f7aeca45b23c09522afd9f88c6782b2a2acd0783 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Tue, 26 Nov 2019 14:56:31 -0800 Subject: [PATCH 085/591] [Fuchsia] Don't fail for unknown architectures When selecting the set of default sanitizers, don't fail for unknown architectures. This may be the case e.g. with x86_64-unknown-fuchsia -m32 target that's used to build the bootloader. Differential Revision: https://reviews.llvm.org/D70747 --- clang/lib/Driver/ToolChains/Fuchsia.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 9bea0b15c8739..4c5d4003f1442 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -351,10 +351,9 @@ SanitizerMask Fuchsia::getDefaultSanitizers() const { case llvm::Triple::x86_64: Res |= SanitizerKind::SafeStack; break; - case llvm::Triple::riscv64: - break; default: - llvm_unreachable("invalid architecture"); + // TODO: Enable SafeStack on RISC-V once tested. + break; } return Res; } From 82b4dc0256d4ef27692308392766b2970249d9cf Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 26 Nov 2019 19:52:15 -0800 Subject: [PATCH 086/591] XFAIL a test on Windows http://45.33.8.238/win/3052/step_6.txt C:\src\llvm-project\clang\test\Preprocessor\file_test.c:9:11: error: CHECK: expected string not found in input // CHECK: filename: "/UNLIKELY_PATH/empty{{/|\\\\}}file_test.c" ^ :1:1: note: scanning from here ^ :1:28: note: possible intended match here ^ --- clang/test/Preprocessor/file_test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/Preprocessor/file_test.c b/clang/test/Preprocessor/file_test.c index 09ae48f48e097..3788db6eb090e 100644 --- a/clang/test/Preprocessor/file_test.c +++ b/clang/test/Preprocessor/file_test.c @@ -1,3 +1,4 @@ +// XFAIL: system-windows // RUN: %clang -E -ffile-prefix-map=%p=/UNLIKELY_PATH/empty -c -o - %s | FileCheck %s // RUN: %clang -E -fmacro-prefix-map=%p=/UNLIKELY_PATH/empty -c -o - %s | FileCheck %s // RUN: %clang -E -fmacro-prefix-map=%p=/UNLIKELY_PATH=empty -c -o - %s | FileCheck %s -check-prefix CHECK-EVIL From fd39b1bb20cec32c310ae9b6f1b4603c17a5f832 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 26 Nov 2019 20:28:52 -0800 Subject: [PATCH 087/591] Revert "Revert "As a follow-up to my initial mail to llvm-dev here's a first pass at the O1 described there."" This reapplies: 8ff85ed905a7306977d07a5cd67ab4d5a56fafb4 Original commit message: As a follow-up to my initial mail to llvm-dev here's a first pass at the O1 described there. This change doesn't include any change to move from selection dag to fast isel and that will come with other numbers that should help inform that decision. There also haven't been any real debuggability studies with this pipeline yet, this is just the initial start done so that people could see it and we could start tweaking after. Test updates: Outside of the newpm tests most of the updates are coming from either optimization passes not run anymore (and without a compelling argument at the moment) that were largely used for canonicalization in clang. Original post: http://lists.llvm.org/pipermail/llvm-dev/2019-April/131494.html Tags: #llvm Differential Revision: https://reviews.llvm.org/D65410 This reverts commit c9ddb02659e3ece7a0d9d6b4dac7ceea4ae46e6d. --- .../2008-07-30-implicit-initialization.c | 2 +- clang/test/CodeGen/arm-fp16-arguments.c | 6 +- clang/test/CodeGen/arm-vfp16-arguments2.cpp | 6 +- clang/test/CodeGen/atomic-ops-libcall.c | 34 +-- clang/test/CodeGenCXX/atomicinit.cpp | 2 +- clang/test/CodeGenCXX/auto-var-init.cpp | 9 +- clang/test/CodeGenCXX/discard-name-values.cpp | 4 +- .../CodeGenCXX/microsoft-abi-dynamic-cast.cpp | 18 +- .../test/CodeGenCXX/microsoft-abi-typeid.cpp | 8 +- clang/test/CodeGenCXX/nrvo.cpp | 18 +- clang/test/CodeGenCXX/stack-reuse.cpp | 2 +- clang/test/CodeGenCXX/wasm-args-returns.cpp | 12 +- clang/test/CodeGenObjCXX/arc-blocks.mm | 6 +- clang/test/CodeGenObjCXX/nrvo.mm | 4 +- ...e_to_dependency_directives_invalid_error.c | 32 +-- .../test/PCH/no-escaping-block-tail-calls.cpp | 4 +- .../ambiguous_tail_call_seq1/Makefile | 2 +- .../ambiguous_tail_call_seq2/Makefile | 2 +- .../disambiguate_call_site/Makefile | 2 +- .../Makefile | 2 +- .../disambiguate_tail_call_seq/Makefile | 2 +- .../inlining_and_tail_calls/Makefile | 2 +- .../tail_call_frames/sbapi_support/Makefile | 2 +- .../thread_step_out_message/Makefile | 2 +- .../thread_step_out_or_return/Makefile | 2 +- .../unambiguous_sequence/Makefile | 2 +- llvm/include/llvm/Passes/PassBuilder.h | 10 +- llvm/lib/Passes/PassBuilder.cpp | 48 ++-- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 46 +-- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll | 268 +++++++++--------- llvm/test/Feature/optnone-opt.ll | 6 - llvm/test/Other/new-pm-defaults.ll | 78 +++-- llvm/test/Other/new-pm-thinlto-defaults.ll | 46 +-- llvm/test/Transforms/MemCpyOpt/lifetime.ll | 2 +- .../PhaseOrdering/simplifycfg-options.ll | 8 +- .../PhaseOrdering/two-shifts-by-sext.ll | 4 +- 36 files changed, 353 insertions(+), 350 deletions(-) diff --git a/clang/test/CodeGen/2008-07-30-implicit-initialization.c b/clang/test/CodeGen/2008-07-30-implicit-initialization.c index e77c70a140f9d..f2621f4560ec9 100644 --- a/clang/test/CodeGen/2008-07-30-implicit-initialization.c +++ b/clang/test/CodeGen/2008-07-30-implicit-initialization.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple i386-unknown-unknown -O1 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-unknown -O2 -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: define i32 @f0() // CHECK: ret i32 0 // CHECK-LABEL: define i32 @f1() diff --git a/clang/test/CodeGen/arm-fp16-arguments.c b/clang/test/CodeGen/arm-fp16-arguments.c index d739f4b9c66a5..34dc1a1cbf6aa 100644 --- a/clang/test/CodeGen/arm-fp16-arguments.c +++ b/clang/test/CodeGen/arm-fp16-arguments.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD -// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=NATIVE +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=NATIVE __fp16 g; diff --git a/clang/test/CodeGen/arm-vfp16-arguments2.cpp b/clang/test/CodeGen/arm-vfp16-arguments2.cpp index 4f75971d83277..e436a5ecd6abd 100644 --- a/clang/test/CodeGen/arm-vfp16-arguments2.cpp +++ b/clang/test/CodeGen/arm-vfp16-arguments2.cpp @@ -1,12 +1,12 @@ // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ -// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O1 %s \ +// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O2 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-SOFT // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ -// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O1 %s \ +// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O2 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-HARD // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ // RUN: -mfloat-abi hard -target-feature +neon -target-feature +fullfp16 \ -// RUN: -emit-llvm -o - -O1 %s \ +// RUN: -emit-llvm -o - -O2 %s \ // RUN: | FileCheck %s --check-prefix=CHECK-FULL typedef float float32_t; diff --git a/clang/test/CodeGen/atomic-ops-libcall.c b/clang/test/CodeGen/atomic-ops-libcall.c index c673b07f8ed83..ca79688c8a0c4 100644 --- a/clang/test/CodeGen/atomic-ops-libcall.c +++ b/clang/test/CodeGen/atomic-ops-libcall.c @@ -10,109 +10,109 @@ enum memory_order { int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) { // CHECK: test_c11_atomic_fetch_add_int_ptr - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5) return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst); } int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) { // CHECK: test_c11_atomic_fetch_sub_int_ptr - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5) return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst); } int test_c11_atomic_fetch_add_int(_Atomic(int) *p) { // CHECK: test_c11_atomic_fetch_add_int - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5) return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst); } int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) { // CHECK: test_c11_atomic_fetch_sub_int - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5) return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst); } int *fp2a(int **p) { // CHECK: @fp2a - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0) // Note, the GNU builtins do not multiply by sizeof(T)! return __atomic_fetch_sub(p, 4, memory_order_relaxed); } int test_atomic_fetch_add(int *p) { // CHECK: test_atomic_fetch_add - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_add(p, 55, memory_order_seq_cst); } int test_atomic_fetch_sub(int *p) { // CHECK: test_atomic_fetch_sub - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_sub(p, 55, memory_order_seq_cst); } int test_atomic_fetch_and(int *p) { // CHECK: test_atomic_fetch_and - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_and(p, 55, memory_order_seq_cst); } int test_atomic_fetch_or(int *p) { // CHECK: test_atomic_fetch_or - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_or(p, 55, memory_order_seq_cst); } int test_atomic_fetch_xor(int *p) { // CHECK: test_atomic_fetch_xor - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_xor(p, 55, memory_order_seq_cst); } int test_atomic_fetch_nand(int *p) { // CHECK: test_atomic_fetch_nand - // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: {{%[^ ]*}} = call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) return __atomic_fetch_nand(p, 55, memory_order_seq_cst); } int test_atomic_add_fetch(int *p) { // CHECK: test_atomic_add_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = add i32 [[CALL]], 55 return __atomic_add_fetch(p, 55, memory_order_seq_cst); } int test_atomic_sub_fetch(int *p) { // CHECK: test_atomic_sub_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = add i32 [[CALL]], -55 return __atomic_sub_fetch(p, 55, memory_order_seq_cst); } int test_atomic_and_fetch(int *p) { // CHECK: test_atomic_and_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = and i32 [[CALL]], 55 return __atomic_and_fetch(p, 55, memory_order_seq_cst); } int test_atomic_or_fetch(int *p) { // CHECK: test_atomic_or_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = or i32 [[CALL]], 55 return __atomic_or_fetch(p, 55, memory_order_seq_cst); } int test_atomic_xor_fetch(int *p) { // CHECK: test_atomic_xor_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5) // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], 55 return __atomic_xor_fetch(p, 55, memory_order_seq_cst); } int test_atomic_nand_fetch(int *p) { // CHECK: test_atomic_nand_fetch - // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) + // CHECK: [[CALL:%[^ ]*]] = call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5) // FIXME: We should not be checking optimized IR. It changes independently of clang. // FIXME-CHECK: [[AND:%[^ ]*]] = and i32 [[CALL]], 55 // FIXME-CHECK: {{%[^ ]*}} = xor i32 [[AND]], -1 diff --git a/clang/test/CodeGenCXX/atomicinit.cpp b/clang/test/CodeGenCXX/atomicinit.cpp index 85ec74593fe04..657ade588fd5d 100644 --- a/clang/test/CodeGenCXX/atomicinit.cpp +++ b/clang/test/CodeGenCXX/atomicinit.cpp @@ -31,7 +31,7 @@ _Atomic(B) b; // CHECK-LABEL: define void @_Z11atomic_initR1Ai void atomic_init(A& a, int i) { // CHECK-NOT: atomic - // CHECK: tail call void @_ZN1BC1Ei + // CHECK: call void @_ZN1BC1Ei __c11_atomic_init(&b, B(i)); // CHECK-NEXT: ret void } diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index a2cb2c8352b69..9cd71bdfd1a7d 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -645,7 +645,7 @@ TEST_UNINIT(smallpartinit, smallpartinit); // ZERO-LABEL: @test_smallpartinit_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1-LEGACY: store i16 0, i16* %uninit, align 2 -// ZERO-O1-NEWPM: store i16 42, i16* %uninit, align 2 +// ZERO-O1-NEWPM: store i16 0, i16* %uninit, align 2 TEST_BRACES(smallpartinit, smallpartinit); // CHECK-LABEL: @test_smallpartinit_braces() @@ -718,7 +718,7 @@ TEST_UNINIT(paddednullinit, paddednullinit); // PATTERN-LABEL: @test_paddednullinit_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_paddednullinit_uninit.uninit // PATTERN-O1-LEGACY: store i64 [[I64]], i64* %uninit, align 8 -// PATTERN-O1-NEWPM: store i64 2863311360, i64* %uninit, align 8 +// PATTERN-O1-NEWPM: store i64 [[I64]], i64* %uninit, align 8 // ZERO-LABEL: @test_paddednullinit_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1: store i64 0, i64* %uninit, align 8 @@ -1344,10 +1344,7 @@ TEST_UNINIT(virtualderived, virtualderived); // ZERO-LABEL: @test_virtualderived_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, // ZERO-O1-LEGACY: call void @llvm.memset{{.*}}, i8 0, -// ZERO-O1-NEWPM: [[FIELD1:%.*]] = getelementptr inbounds %struct.virtualderived, %struct.virtualderived* %uninit, i64 0, i32 1, i32 0, i32 0 -// ZERO-O1-NEWPM: [[FIELD0:%.*]] = getelementptr inbounds %struct.virtualderived, %struct.virtualderived* %uninit, i64 0, i32 0, i32 0 -// ZERO-O1-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*], [5 x i8*] }, { [7 x i8*], [5 x i8*] }* @_ZTV14virtualderived, i64 0, inrange i32 0, i64 5) to i32 (...)**), i32 (...)*** [[FIELD0]], align 8 -// ZERO-O1-NEWPM: store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*], [5 x i8*] }, { [7 x i8*], [5 x i8*] }* @_ZTV14virtualderived, i64 0, inrange i32 1, i64 3) to i32 (...)**), i32 (...)*** [[FIELD1]], align 8 +// ZERO-O1-NEWPM: call void @llvm.memset{{.*}}, i8 0, TEST_BRACES(virtualderived, virtualderived); // CHECK-LABEL: @test_virtualderived_braces() diff --git a/clang/test/CodeGenCXX/discard-name-values.cpp b/clang/test/CodeGenCXX/discard-name-values.cpp index aa30dae7501bd..91328a4ddade3 100644 --- a/clang/test/CodeGenCXX/discard-name-values.cpp +++ b/clang/test/CodeGenCXX/discard-name-values.cpp @@ -11,11 +11,11 @@ bool test(bool pred) { if (pred) { // DISCARDVALUE: 2: - // DISCARDVALUE-NEXT: tail call void @branch() + // DISCARDVALUE-NEXT: call void @branch() // DISCARDVALUE-NEXT: br label %3 // CHECK: if.then: - // CHECK-NEXT: tail call void @branch() + // CHECK-NEXT: call void @branch() // CHECK-NEXT: br label %if.end branch(); } diff --git a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp index c99df0e88b420..a07114dce7d07 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp @@ -13,7 +13,7 @@ T* test0() { return dynamic_cast((B*)0); } T* test1(V* x) { return &dynamic_cast(*x); } // CHECK-LABEL: define dso_local %struct.T* @"?test1@@YAPAUT@@PAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -25,7 +25,7 @@ T* test2(A* x) { return &dynamic_cast(*x); } // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[VBOFFS]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -39,14 +39,14 @@ T* test3(B* x) { return &dynamic_cast(*x); } // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 1) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] T* test4(V* x) { return dynamic_cast(x); } // CHECK-LABEL: define dso_local %struct.T* @"?test4@@YAPAUT@@PAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[CAST]], i32 0, i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUV@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: ret %struct.T* [[RET]] @@ -60,7 +60,7 @@ T* test5(A* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[VBOFFS]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* nonnull [[ADJ]], i32 [[VBOFFS]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUA@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi %struct.T* @@ -78,7 +78,7 @@ T* test6(B* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUB@@@8" to i8*), i8* {{.*}}bitcast (%rtti.TypeDescriptor7* @"??_R0?AUT@@@8" to i8*), i32 0) // CHECK-NEXT: [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T* // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi %struct.T* @@ -87,7 +87,7 @@ T* test6(B* x) { return dynamic_cast(x); } void* test7(V* x) { return dynamic_cast(x); } // CHECK-LABEL: define dso_local i8* @"?test7@@YAPAXPAUV@@@Z"(%struct.V* %x) // CHECK: [[CAST:%.*]] = bitcast %struct.V* %x to i8* -// CHECK-NEXT: [[RET:%.*]] = tail call i8* @__RTCastToVoid(i8* [[CAST]]) +// CHECK-NEXT: [[RET:%.*]] = call i8* @__RTCastToVoid(i8* [[CAST]]) // CHECK-NEXT: ret i8* [[RET]] void* test8(A* x) { return dynamic_cast(x); } @@ -100,7 +100,7 @@ void* test8(A* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]] -// CHECK-NEXT: [[RES:%.*]] = tail call i8* @__RTCastToVoid(i8* nonnull [[ADJ]]) +// CHECK-NEXT: [[RES:%.*]] = call i8* @__RTCastToVoid(i8* nonnull [[ADJ]]) // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi i8* // CHECK-NEXT: ret i8* [[RET]] @@ -117,7 +117,7 @@ void* test9(B* x) { return dynamic_cast(x); } // CHECK-NEXT: [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4 // CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]] -// CHECK-NEXT: [[CALL:%.*]] = tail call i8* @__RTCastToVoid(i8* [[ADJ]]) +// CHECK-NEXT: [[CALL:%.*]] = call i8* @__RTCastToVoid(i8* [[ADJ]]) // CHECK-NEXT: br label // CHECK: [[RET:%.*]] = phi i8* // CHECK-NEXT: ret i8* [[RET]] diff --git a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp index 848e280cd9fe0..f3bd7e6fd6c80 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-typeid.cpp @@ -25,10 +25,10 @@ const std::type_info* test2_typeid() { return &typeid(&a); } const std::type_info* test3_typeid() { return &typeid(*fn()); } // CHECK-LABEL: define dso_local %struct.type_info* @"?test3_typeid@@YAPBUtype_info@@XZ"() -// CHECK: [[CALL:%.*]] = tail call %struct.A* @"?fn@@YAPAUA@@XZ"() +// CHECK: [[CALL:%.*]] = call %struct.A* @"?fn@@YAPAUA@@XZ"() // CHECK-NEXT: [[CMP:%.*]] = icmp eq %struct.A* [[CALL]], null // CHECK-NEXT: br i1 [[CMP]] -// CHECK: tail call i8* @__RTtypeid(i8* null) +// CHECK: call i8* @__RTtypeid(i8* null) // CHECK-NEXT: unreachable // CHECK: [[THIS:%.*]] = bitcast %struct.A* [[CALL]] to i8* // CHECK-NEXT: [[VBTBLP:%.*]] = getelementptr %struct.A, %struct.A* [[CALL]], i32 0, i32 0 @@ -36,7 +36,7 @@ const std::type_info* test3_typeid() { return &typeid(*fn()); } // CHECK-NEXT: [[VBSLOT:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1 // CHECK-NEXT: [[VBASE_OFFS:%.*]] = load i32, i32* [[VBSLOT]], align 4 // CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[THIS]], i32 [[VBASE_OFFS]] -// CHECK-NEXT: [[RT:%.*]] = tail call i8* @__RTtypeid(i8* nonnull [[ADJ]]) +// CHECK-NEXT: [[RT:%.*]] = call i8* @__RTtypeid(i8* nonnull [[ADJ]]) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info* // CHECK-NEXT: ret %struct.type_info* [[RET]] @@ -46,7 +46,7 @@ const std::type_info* test4_typeid() { return &typeid(b); } const std::type_info* test5_typeid() { return &typeid(v); } // CHECK: define dso_local %struct.type_info* @"?test5_typeid@@YAPBUtype_info@@XZ"() -// CHECK: [[RT:%.*]] = tail call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*)) +// CHECK: [[RT:%.*]] = call i8* @__RTtypeid(i8* bitcast (%struct.V* @"?v@@3UV@@A" to i8*)) // CHECK-NEXT: [[RET:%.*]] = bitcast i8* [[RT]] to %struct.type_info* // CHECK-NEXT: ret %struct.type_info* [[RET]] diff --git a/clang/test/CodeGenCXX/nrvo.cpp b/clang/test/CodeGenCXX/nrvo.cpp index aab26890ea988..74a5af765d130 100644 --- a/clang/test/CodeGenCXX/nrvo.cpp +++ b/clang/test/CodeGenCXX/nrvo.cpp @@ -33,13 +33,13 @@ X test0() { // CHECK-LABEL: define void @_Z5test1b( // CHECK-EH-LABEL: define void @_Z5test1b( X test1(bool B) { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret void X x; if (B) return (x); return x; - // CHECK-EH: tail call {{.*}} @_ZN1XC1Ev + // CHECK-EH: call {{.*}} @_ZN1XC1Ev // CHECK-EH-NEXT: ret void } @@ -130,7 +130,7 @@ X test2(bool B) { // CHECK-LABEL: define void @_Z5test3b X test3(bool B) { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK-NOT: call {{.*}} @_ZN1XC1ERKS_ // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK: call {{.*}} @_ZN1XC1ERKS_ @@ -148,14 +148,14 @@ extern "C" void exit(int) throw(); // CHECK-LABEL: define void @_Z5test4b X test4(bool B) { { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev X x; // CHECK: br i1 if (B) return x; } - // CHECK: tail call {{.*}} @_ZN1XD1Ev - // CHECK: tail call void @exit(i32 1) + // CHECK: call {{.*}} @_ZN1XD1Ev + // CHECK: call void @exit(i32 1) exit(1); } @@ -191,7 +191,7 @@ X test6() { // CHECK-LABEL: define void @_Z5test7b X test7(bool b) { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret if (b) { X x; @@ -202,7 +202,7 @@ X test7(bool b) { // CHECK-LABEL: define void @_Z5test8b X test8(bool b) { - // CHECK: tail call {{.*}} @_ZN1XC1Ev + // CHECK: call {{.*}} @_ZN1XC1Ev // CHECK-NEXT: ret if (b) { X x; @@ -218,6 +218,6 @@ Y test9() { } // CHECK-LABEL: define linkonce_odr void @_ZN1YIiE1fEv -// CHECK: tail call {{.*}} @_ZN1YIiEC1Ev +// CHECK: call {{.*}} @_ZN1YIiEC1Ev // CHECK-EH-03: attributes [[NR_NUW]] = { noreturn nounwind } diff --git a/clang/test/CodeGenCXX/stack-reuse.cpp b/clang/test/CodeGenCXX/stack-reuse.cpp index 8325604391ae2..35dcb5b349c3e 100644 --- a/clang/test/CodeGenCXX/stack-reuse.cpp +++ b/clang/test/CodeGenCXX/stack-reuse.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf %s -o - -emit-llvm -O1 | FileCheck %s +// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf %s -o - -emit-llvm -O2 | FileCheck %s // Stack should be reused when possible, no need to allocate two separate slots // if they have disjoint lifetime. diff --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp index 5718223f9f740..c547eb85390da 100644 --- a/clang/test/CodeGenCXX/wasm-args-returns.cpp +++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp @@ -19,8 +19,8 @@ test(one_field); // CHECK: define double @_Z7forward9one_field(double returned %{{.*}}) // // CHECK: define void @_Z14test_one_fieldv() -// CHECK: %[[call:.*]] = tail call double @_Z13def_one_fieldv() -// CHECK: tail call void @_Z3use9one_field(double %[[call]]) +// CHECK: %[[call:.*]] = call double @_Z13def_one_fieldv() +// CHECK: call void @_Z3use9one_field(double %[[call]]) // CHECK: ret void // // CHECK: declare void @_Z3use9one_field(double) @@ -82,8 +82,8 @@ test(empty); // CHECK: define void @_Z7forward5empty() // // CHECK: define void @_Z10test_emptyv() -// CHECK: tail call void @_Z9def_emptyv() -// CHECK: tail call void @_Z3use5empty() +// CHECK: call void @_Z9def_emptyv() +// CHECK: call void @_Z3use5empty() // CHECK: ret void // // CHECK: declare void @_Z3use5empty() @@ -96,8 +96,8 @@ test(one_bitfield); // CHECK: define i32 @_Z7forward12one_bitfield(i32 returned %{{.*}}) // // CHECK: define void @_Z17test_one_bitfieldv() -// CHECK: %[[call:.*]] = tail call i32 @_Z16def_one_bitfieldv() -// CHECK: tail call void @_Z3use12one_bitfield(i32 %[[call]]) +// CHECK: %[[call:.*]] = call i32 @_Z16def_one_bitfieldv() +// CHECK: call void @_Z3use12one_bitfield(i32 %[[call]]) // CHECK: ret void // // CHECK: declare void @_Z3use12one_bitfield(i32) diff --git a/clang/test/CodeGenObjCXX/arc-blocks.mm b/clang/test/CodeGenObjCXX/arc-blocks.mm index 24697cf1bd377..d29491ed077ea 100644 --- a/clang/test/CodeGenObjCXX/arc-blocks.mm +++ b/clang/test/CodeGenObjCXX/arc-blocks.mm @@ -122,7 +122,7 @@ void foo() { // CHECK: call void @__clang_call_terminate( // CHECK-O1-LABEL: define linkonce_odr hidden void @__copy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( -// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release // CHECK-NOEXCP: define linkonce_odr hidden void @__copy_helper_block_8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( // CHECK: define linkonce_odr hidden void @__destroy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( @@ -170,8 +170,8 @@ void foo() { // CHECK: call void @__clang_call_terminate( // CHECK-O1-LABEL: define linkonce_odr hidden void @__destroy_helper_block_ea8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( -// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release -// CHECK-O1: tail call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release +// CHECK-O1: call void @llvm.objc.release({{.*}}) {{.*}} !clang.imprecise_release // CHECK-NOEXCP: define linkonce_odr hidden void @__destroy_helper_block_8_32s40r48w56c15_ZTSN5test12S0E60c15_ZTSN5test12S0E( namespace { diff --git a/clang/test/CodeGenObjCXX/nrvo.mm b/clang/test/CodeGenObjCXX/nrvo.mm index 1ad5f79ad12ea..a02b38b820a3e 100644 --- a/clang/test/CodeGenObjCXX/nrvo.mm +++ b/clang/test/CodeGenObjCXX/nrvo.mm @@ -14,7 +14,7 @@ @implementation NRVO // CHECK: define internal void @"\01-[NRVO getNRVO]" - (X)getNRVO { X x; - // CHECK: tail call void @_ZN1XC1Ev + // CHECK: call void @_ZN1XC1Ev // CHECK-NEXT: ret void return x; } @@ -24,7 +24,7 @@ X blocksNRVO() { return ^{ // CHECK-LABEL: define internal void @___Z10blocksNRVOv_block_invoke X x; - // CHECK: tail call void @_ZN1XC1Ev + // CHECK: call void @_ZN1XC1Ev // CHECK-NEXT: ret void return x; }() ; diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c index c4a4cf3d97526..020912a4965de 100644 --- a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_error.c @@ -1,16 +1,16 @@ -// Test CF+LF are properly handled along with quoted, multi-line #error -// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s - -#ifndef TEST -#error "message \ - more message \ - even more" -#endif - -#ifdef OTHER -#include -#endif - -// CHECK: #ifdef OTHER -// CHECK-NEXT: #include -// CHECK-NEXT: #endif +// Test CF+LF are properly handled along with quoted, multi-line #error +// RUN: %clang_cc1 -DOTHER -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s + +#ifndef TEST +#error "message \ + more message \ + even more" +#endif + +#ifdef OTHER +#include +#endif + +// CHECK: #ifdef OTHER +// CHECK-NEXT: #include +// CHECK-NEXT: #endif diff --git a/clang/test/PCH/no-escaping-block-tail-calls.cpp b/clang/test/PCH/no-escaping-block-tail-calls.cpp index 5ae8108f387d0..bf197267d67d4 100644 --- a/clang/test/PCH/no-escaping-block-tail-calls.cpp +++ b/clang/test/PCH/no-escaping-block-tail-calls.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -x c++-header -triple x86_64-apple-darwin11 -emit-pch -O1 -fblocks -fno-escaping-block-tail-calls -o %t %S/no-escaping-block-tail-calls.h -// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -include-pch %t -emit-llvm -O1 -fblocks -fno-escaping-block-tail-calls -o - %s | FileCheck %s +// RUN: %clang_cc1 -x c++-header -triple x86_64-apple-darwin11 -emit-pch -O2 -fblocks -fno-escaping-block-tail-calls -o %t %S/no-escaping-block-tail-calls.h +// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -include-pch %t -emit-llvm -O2 -fblocks -fno-escaping-block-tail-calls -o - %s | FileCheck %s // Check that -fno-escaping-block-tail-calls doesn't disable tail-call // optimization if the block is non-escaping. diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq1/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq1/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq1/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq1/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq2/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq2/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq2/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/ambiguous_tail_call_seq2/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_call_site/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_call_site/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_call_site/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_call_site/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_paths_to_common_sink/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_paths_to_common_sink/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_paths_to_common_sink/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_paths_to_common_sink/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_tail_call_seq/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_tail_call_seq/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_tail_call_seq/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/disambiguate_tail_call_seq/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/inlining_and_tail_calls/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/inlining_and_tail_calls/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/inlining_and_tail_calls/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/inlining_and_tail_calls/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/sbapi_support/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/sbapi_support/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/sbapi_support/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/sbapi_support/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_message/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_message/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_message/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_message/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/thread_step_out_or_return/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/unambiguous_sequence/Makefile b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/unambiguous_sequence/Makefile index 48342e8e3afb9..666a6c3655460 100644 --- a/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/unambiguous_sequence/Makefile +++ b/lldb/packages/Python/lldbsuite/test/functionalities/tail_call_frames/unambiguous_sequence/Makefile @@ -1,4 +1,4 @@ CXX_SOURCES := main.cpp -CXXFLAGS_EXTRAS := -g -O1 -glldb +CXXFLAGS_EXTRAS := -g -O2 -glldb include Makefile.rules diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index f73e4b42dd4bf..7fe03f72305b1 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -151,10 +151,6 @@ class PassBuilder { /// Optimize quickly without destroying debuggability. /// - /// FIXME: The current and historical behavior of this level does *not* - /// agree with this goal, but we would like to move toward this goal in the - /// future. - /// /// This level is tuned to produce a result from the optimizer as quickly /// as possible and to avoid destroying debuggability. This tends to result /// in a very good development mode where the compiled code will be @@ -164,9 +160,9 @@ class PassBuilder { /// debugging of the resulting binary. /// /// As an example, complex loop transformations such as versioning, - /// vectorization, or fusion might not make sense here due to the degree to - /// which the executed code would differ from the source code, and the - /// potential compile time cost. + /// vectorization, or fusion don't make sense here due to the degree to + /// which the executed code differs from the source code, and the compile time + /// cost. O1, /// Optimize for fast execution as much as possible without triggering diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 5896dbf5bb985..b22921b2b878c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -400,21 +400,25 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. - if (EnableGVNHoist) - FPM.addPass(GVNHoistPass()); - - // Global value numbering based sinking. - if (EnableGVNSink) { - FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); + if (Level > O1) { + if (EnableGVNHoist) + FPM.addPass(GVNHoistPass()); + + // Global value numbering based sinking. + if (EnableGVNSink) { + FPM.addPass(GVNSinkPass()); + FPM.addPass(SimplifyCFGPass()); + } } // Speculative execution if the target has divergent branches; otherwise nop. - FPM.addPass(SpeculativeExecutionPass()); + if (Level > O1) { + FPM.addPass(SpeculativeExecutionPass()); - // Optimize based on known information about branches, and cleanup afterward. - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); + // Optimize based on known information about branches, and cleanup afterward. + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + } FPM.addPass(SimplifyCFGPass()); if (Level == O3) FPM.addPass(AggressiveInstCombinePass()); @@ -428,10 +432,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && - !isOptimizingForSize(Level)) + !isOptimizingForSize(Level) && Level > O1) FPM.addPass(PGOMemOPSizeOpt()); - FPM.addPass(TailCallElimPass()); + // TODO: Investigate the cost/benefit of tail call elimination on debugging. + if (Level > O1) + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); // Form canonically associated expression trees, and simplify the trees using @@ -458,6 +464,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); + // TODO: Investigate promotion cap for O1. LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); @@ -525,18 +532,21 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. - FPM.addPass(JumpThreadingPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(DSEPass()); - FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), - EnableMSSALoopDependency, DebugLogging)); + if (Level > O1) { + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DSEPass()); + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + EnableMSSALoopDependency, DebugLogging)); + } for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. + // TODO: Investigate if this is too expensive. FPM.addPass(ADCEPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 5314a8219b1ea..81424229c3bfe 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -320,19 +320,26 @@ void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. + assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies - if (EnableGVNHoist) - MPM.add(createGVNHoistPass()); - if (EnableGVNSink) { - MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); + + if (OptLevel > 1) { + if (EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } } - // Speculative execution if the target has divergent branches; otherwise nop. - MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); - MPM.add(createJumpThreadingPass()); // Thread jumps. - MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + if (OptLevel > 1) { + // Speculative execution if the target has divergent branches; otherwise nop. + MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); + + MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals + } MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Combine silly seq's if (OptLevel > 2) @@ -346,8 +353,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses( if (SizeLevel == 0) MPM.add(createPGOMemOPSizeOptLegacyPass()); - MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + // TODO: Investigate the cost/benefit of tail call elimination on debugging. + if (OptLevel > 1) + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // Begin the loop pass pipeline. @@ -360,6 +369,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( } // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); + // TODO: Investigate promotion cap for O1. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); @@ -402,16 +412,19 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // opened up by them. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createJumpThreadingPass()); // Thread jumps - MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + if (OptLevel > 1) { + MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + } addExtensionsToPM(EP_ScalarOptimizerLate, MPM); if (RerollLoops) MPM.add(createLoopRerollPass()); + // TODO: Investigate if this is too expensive at O1. MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. @@ -899,7 +912,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. - PM.add(createTailCallEliminationPass()); + if (OptLevel > 1) + PM.add(createTailCallEliminationPass()); // Infer attributes on declarations, call sites, arguments, etc. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index 859f848d228c4..682c0679fa240 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -3,17 +3,17 @@ ; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos -; GCN-POSTLINK: tail call fast float @_Z3sinf( -; GCN-POSTLINK: tail call fast float @_Z3cosf( +; GCN-POSTLINK: call fast float @_Z3sinf( +; GCN-POSTLINK: call fast float @_Z3cosf( ; GCN-PRELINK: call fast float @_Z6sincosfPf( -; GCN-NATIVE: tail call fast float @_Z10native_sinf( -; GCN-NATIVE: tail call fast float @_Z10native_cosf( +; GCN-NATIVE: call fast float @_Z10native_sinf( +; GCN-NATIVE: call fast float @_Z10native_cosf( define amdgpu_kernel void @test_sincos(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3sinf(float %tmp) + %call = call fast float @_Z3sinf(float %tmp) store float %call, float addrspace(1)* %a, align 4 - %call2 = tail call fast float @_Z3cosf(float %tmp) + %call2 = call fast float @_Z3cosf(float %tmp) %arrayidx3 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 store float %call2, float addrspace(1)* %arrayidx3, align 4 ret void @@ -24,17 +24,17 @@ declare float @_Z3sinf(float) declare float @_Z3cosf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2 -; GCN-POSTLINK: tail call fast <2 x float> @_Z3sinDv2_f( -; GCN-POSTLINK: tail call fast <2 x float> @_Z3cosDv2_f( +; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f( +; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f( ; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_( -; GCN-NATIVE: tail call fast <2 x float> @_Z10native_sinDv2_f( -; GCN-NATIVE: tail call fast <2 x float> @_Z10native_cosDv2_f( +; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f( +; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f( define amdgpu_kernel void @test_sincos_v2(<2 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <2 x float>, <2 x float> addrspace(1)* %a, align 8 - %call = tail call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) + %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) store <2 x float> %call, <2 x float> addrspace(1)* %a, align 8 - %call2 = tail call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) + %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %a, i64 1 store <2 x float> %call2, <2 x float> addrspace(1)* %arrayidx3, align 8 ret void @@ -45,20 +45,20 @@ declare <2 x float> @_Z3sinDv2_f(<2 x float>) declare <2 x float> @_Z3cosDv2_f(<2 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3 -; GCN-POSTLINK: tail call fast <3 x float> @_Z3sinDv3_f( -; GCN-POSTLINK: tail call fast <3 x float> @_Z3cosDv3_f( +; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f( +; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f( ; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_( -; GCN-NATIVE: tail call fast <3 x float> @_Z10native_sinDv3_f( -; GCN-NATIVE: tail call fast <3 x float> @_Z10native_cosDv3_f( +; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f( +; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f( define amdgpu_kernel void @test_sincos_v3(<3 x float> addrspace(1)* nocapture %a) { entry: %castToVec4 = bitcast <3 x float> addrspace(1)* %a to <4 x float> addrspace(1)* %loadVec4 = load <4 x float>, <4 x float> addrspace(1)* %castToVec4, align 16 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> - %call = tail call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) + %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> store <4 x float> %extractVec6, <4 x float> addrspace(1)* %castToVec4, align 16 - %call11 = tail call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) + %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) %arrayidx12 = getelementptr inbounds <3 x float>, <3 x float> addrspace(1)* %a, i64 1 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> %storetmp14 = bitcast <3 x float> addrspace(1)* %arrayidx12 to <4 x float> addrspace(1)* @@ -71,17 +71,17 @@ declare <3 x float> @_Z3sinDv3_f(<3 x float>) declare <3 x float> @_Z3cosDv3_f(<3 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4 -; GCN-POSTLINK: tail call fast <4 x float> @_Z3sinDv4_f( -; GCN-POSTLINK: tail call fast <4 x float> @_Z3cosDv4_f( +; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f( +; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f( ; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_( -; GCN-NATIVE: tail call fast <4 x float> @_Z10native_sinDv4_f( -; GCN-NATIVE: tail call fast <4 x float> @_Z10native_cosDv4_f( +; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f( +; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f( define amdgpu_kernel void @test_sincos_v4(<4 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <4 x float>, <4 x float> addrspace(1)* %a, align 16 - %call = tail call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) + %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) store <4 x float> %call, <4 x float> addrspace(1)* %a, align 16 - %call2 = tail call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) + %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) %arrayidx3 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %a, i64 1 store <4 x float> %call2, <4 x float> addrspace(1)* %arrayidx3, align 16 ret void @@ -92,17 +92,17 @@ declare <4 x float> @_Z3sinDv4_f(<4 x float>) declare <4 x float> @_Z3cosDv4_f(<4 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8 -; GCN-POSTLINK: tail call fast <8 x float> @_Z3sinDv8_f( -; GCN-POSTLINK: tail call fast <8 x float> @_Z3cosDv8_f( +; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f( +; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f( ; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_( -; GCN-NATIVE: tail call fast <8 x float> @_Z10native_sinDv8_f( -; GCN-NATIVE: tail call fast <8 x float> @_Z10native_cosDv8_f( +; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f( +; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f( define amdgpu_kernel void @test_sincos_v8(<8 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <8 x float>, <8 x float> addrspace(1)* %a, align 32 - %call = tail call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) + %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) store <8 x float> %call, <8 x float> addrspace(1)* %a, align 32 - %call2 = tail call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) + %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) %arrayidx3 = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %a, i64 1 store <8 x float> %call2, <8 x float> addrspace(1)* %arrayidx3, align 32 ret void @@ -113,17 +113,17 @@ declare <8 x float> @_Z3sinDv8_f(<8 x float>) declare <8 x float> @_Z3cosDv8_f(<8 x float>) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16 -; GCN-POSTLINK: tail call fast <16 x float> @_Z3sinDv16_f( -; GCN-POSTLINK: tail call fast <16 x float> @_Z3cosDv16_f( +; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f( +; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f( ; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_( -; GCN-NATIVE: tail call fast <16 x float> @_Z10native_sinDv16_f( -; GCN-NATIVE: tail call fast <16 x float> @_Z10native_cosDv16_f( +; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f( +; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f( define amdgpu_kernel void @test_sincos_v16(<16 x float> addrspace(1)* nocapture %a) { entry: %tmp = load <16 x float>, <16 x float> addrspace(1)* %a, align 64 - %call = tail call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) + %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) store <16 x float> %call, <16 x float> addrspace(1)* %a, align 64 - %call2 = tail call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) + %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) %arrayidx3 = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %a, i64 1 store <16 x float> %call2, <16 x float> addrspace(1)* %arrayidx3, align 64 ret void @@ -137,7 +137,7 @@ declare <16 x float> @_Z3cosDv16_f(<16 x float>) ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a define amdgpu_kernel void @test_native_recip(float addrspace(1)* nocapture %a) { entry: - %call = tail call fast float @_Z12native_recipf(float 3.000000e+00) + %call = call fast float @_Z12native_recipf(float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -148,7 +148,7 @@ declare float @_Z12native_recipf(float) ; GCN: store float 0x3FD5555560000000, float addrspace(1)* %a define amdgpu_kernel void @test_half_recip(float addrspace(1)* nocapture %a) { entry: - %call = tail call fast float @_Z10half_recipf(float 3.000000e+00) + %call = call fast float @_Z10half_recipf(float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -160,7 +160,7 @@ declare float @_Z10half_recipf(float) define amdgpu_kernel void @test_native_divide(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) + %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -172,7 +172,7 @@ declare float @_Z13native_divideff(float, float) define amdgpu_kernel void @test_half_divide(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) + %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -184,7 +184,7 @@ declare float @_Z11half_divideff(float, float) define amdgpu_kernel void @test_pow_0f(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -196,7 +196,7 @@ declare float @_Z3powff(float, float) define amdgpu_kernel void @test_pow_0i(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 0.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -208,7 +208,7 @@ define amdgpu_kernel void @test_pow_1f(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -220,7 +220,7 @@ define amdgpu_kernel void @test_pow_1i(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 1.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -231,7 +231,7 @@ entry: define amdgpu_kernel void @test_pow_2f(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -242,7 +242,7 @@ entry: define amdgpu_kernel void @test_pow_2i(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 2.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -254,7 +254,7 @@ define amdgpu_kernel void @test_pow_m1f(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -266,31 +266,31 @@ define amdgpu_kernel void @test_pow_m1i(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float -1.000000e+00) + %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half -; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) -; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01) +; GCN-PRELINK: %__pow2sqrt = call fast float @_Z4sqrtf(float %tmp) define amdgpu_kernel void @test_pow_half(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 5.000000e-01) + %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf -; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) -; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01) +; GCN-PRELINK: %__pow2rsqrt = call fast float @_Z5rsqrtf(float %tmp) define amdgpu_kernel void @test_pow_mhalf(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float -5.000000e-01) + %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -305,7 +305,7 @@ define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 1.100000e+01) + %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -320,7 +320,7 @@ define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z4powrff(float %tmp, float 1.100000e+01) + %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -337,7 +337,7 @@ define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z4pownfi(float %tmp, i32 11) + %call = call fast float @_Z4pownfi(float %tmp, i32 11) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -345,11 +345,11 @@ entry: declare float @_Z4pownfi(float, i32) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow -; GCN-POSTLINK: tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) -; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) -; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) +; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 1.013000e+03) +; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp) +; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03 -; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 @@ -359,39 +359,39 @@ declare float @_Z4pownfi(float, i32) define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3powff(float %tmp, float 1.013000e+03) + %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr -; GCN-POSTLINK: tail call fast float @_Z4powrff(float %tmp, float %tmp1) -; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp) +; GCN-POSTLINK: call fast float @_Z4powrff(float %tmp, float %tmp1) +; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %tmp) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: store float %__exp2, float addrspace(1)* %a, align 4 -; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp) ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) +; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx) ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 define amdgpu_kernel void @test_powr(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 - %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) + %call = call fast float @_Z4powrff(float %tmp, float %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown -; GCN-POSTLINK: tail call fast float @_Z4pownfi(float %tmp, i32 %conv) +; GCN-POSTLINK: call fast float @_Z4pownfi(float %tmp, i32 %conv) ; GCN-PRELINK: %conv = fptosi float %tmp1 to i32 -; GCN-PRELINK: %__fabs = tail call fast float @_Z4fabsf(float %tmp) -; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) +; GCN-PRELINK: %__fabs = call fast float @_Z4fabsf(float %tmp) +; GCN-PRELINK: %__log2 = call fast float @_Z4log2f(float %__fabs) ; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F -; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) +; GCN-PRELINK: %__exp2 = call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %__yeven = shl i32 %conv, 31 ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] @@ -405,7 +405,7 @@ entry: %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 %conv = fptosi float %tmp1 to i32 - %call = tail call fast float @_Z4pownfi(float %tmp, i32 %conv) + %call = call fast float @_Z4pownfi(float %tmp, i32 %conv) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -417,7 +417,7 @@ define amdgpu_kernel void @test_rootn_1(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 1) + %call = call fast float @_Z5rootnfi(float %tmp, i32 1) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -425,23 +425,23 @@ entry: declare float @_Z5rootnfi(float, i32) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 -; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 2) -; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 2) +; GCN-PRELINK: %__rootn2sqrt = call fast float @_Z4sqrtf(float %tmp) define amdgpu_kernel void @test_rootn_2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 2) + %call = call fast float @_Z5rootnfi(float %tmp, i32 2) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3 -; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 3) -; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3) +; GCN-PRELINK: %__rootn2cbrt = call fast float @_Z4cbrtf(float %tmp) define amdgpu_kernel void @test_rootn_3(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 3) + %call = call fast float @_Z5rootnfi(float %tmp, i32 3) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -451,18 +451,18 @@ entry: define amdgpu_kernel void @test_rootn_m1(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -1) + %call = call fast float @_Z5rootnfi(float %tmp, i32 -1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2 -; GCN-POSTLINK: tail call fast float @_Z5rootnfi(float %tmp, i32 -2) -; GCN-PRELINK: %__rootn2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) +; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 -2) +; GCN-PRELINK: %__rootn2rsqrt = call fast float @_Z5rsqrtf(float %tmp) define amdgpu_kernel void @test_rootn_m2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rootnfi(float %tmp, i32 -2) + %call = call fast float @_Z5rootnfi(float %tmp, i32 -2) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -472,7 +472,7 @@ entry: define amdgpu_kernel void @test_fma_0x(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) + %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -484,7 +484,7 @@ declare float @_Z3fmafff(float, float, float) define amdgpu_kernel void @test_fma_x0(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) + %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -494,7 +494,7 @@ entry: define amdgpu_kernel void @test_mad_0x(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) + %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -506,7 +506,7 @@ declare float @_Z3madfff(float, float, float) define amdgpu_kernel void @test_mad_x0(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) + %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -516,7 +516,7 @@ entry: define amdgpu_kernel void @test_fma_x1y(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) + %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -526,7 +526,7 @@ entry: define amdgpu_kernel void @test_fma_1xy(float addrspace(1)* nocapture %a, float %y) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) + %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -538,17 +538,17 @@ entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp = load float, float addrspace(1)* %arrayidx, align 4 %tmp1 = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) + %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp -; GCN-NATIVE: tail call fast float @_Z10native_expf(float %tmp) +; GCN-NATIVE: call fast float @_Z10native_expf(float %tmp) define amdgpu_kernel void @test_use_native_exp(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3expf(float %tmp) + %call = call fast float @_Z3expf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -556,11 +556,11 @@ entry: declare float @_Z3expf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2 -; GCN-NATIVE: tail call fast float @_Z11native_exp2f(float %tmp) +; GCN-NATIVE: call fast float @_Z11native_exp2f(float %tmp) define amdgpu_kernel void @test_use_native_exp2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z4exp2f(float %tmp) + %call = call fast float @_Z4exp2f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -568,11 +568,11 @@ entry: declare float @_Z4exp2f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10 -; GCN-NATIVE: tail call fast float @_Z12native_exp10f(float %tmp) +; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp) define amdgpu_kernel void @test_use_native_exp10(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5exp10f(float %tmp) + %call = call fast float @_Z5exp10f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -580,11 +580,11 @@ entry: declare float @_Z5exp10f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log -; GCN-NATIVE: tail call fast float @_Z10native_logf(float %tmp) +; GCN-NATIVE: call fast float @_Z10native_logf(float %tmp) define amdgpu_kernel void @test_use_native_log(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3logf(float %tmp) + %call = call fast float @_Z3logf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -592,11 +592,11 @@ entry: declare float @_Z3logf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2 -; GCN-NATIVE: tail call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: call fast float @_Z11native_log2f(float %tmp) define amdgpu_kernel void @test_use_native_log2(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z4log2f(float %tmp) + %call = call fast float @_Z4log2f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -604,11 +604,11 @@ entry: declare float @_Z4log2f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10 -; GCN-NATIVE: tail call fast float @_Z12native_log10f(float %tmp) +; GCN-NATIVE: call fast float @_Z12native_log10f(float %tmp) define amdgpu_kernel void @test_use_native_log10(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5log10f(float %tmp) + %call = call fast float @_Z5log10f(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -617,36 +617,36 @@ declare float @_Z5log10f(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr ; GCN-NATIVE: %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 -; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp) +; GCN-NATIVE: %__log2 = call fast float @_Z11native_log2f(float %tmp) ; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1 -; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx) +; GCN-NATIVE: %__exp2 = call fast float @_Z11native_exp2f(float %__ylogx) ; GCN-NATIVE: store float %__exp2, float addrspace(1)* %a, align 4 define amdgpu_kernel void @test_use_native_powr(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = load float, float addrspace(1)* %arrayidx1, align 4 - %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) + %call = call fast float @_Z4powrff(float %tmp, float %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt -; GCN-NATIVE: tail call fast float @_Z11native_sqrtf(float %tmp) +; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp) define amdgpu_kernel void @test_use_native_sqrt(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z4sqrtf(float %tmp) + %call = call fast float @_Z4sqrtf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64 -; GCN: tail call fast double @_Z4sqrtd(double %tmp) +; GCN: call fast double @_Z4sqrtd(double %tmp) define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(double addrspace(1)* nocapture %a) { entry: %tmp = load double, double addrspace(1)* %a, align 8 - %call = tail call fast double @_Z4sqrtd(double %tmp) + %call = call fast double @_Z4sqrtd(double %tmp) store double %call, double addrspace(1)* %a, align 8 ret void } @@ -655,11 +655,11 @@ declare float @_Z4sqrtf(float) declare double @_Z4sqrtd(double) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt -; GCN-NATIVE: tail call fast float @_Z12native_rsqrtf(float %tmp) +; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp) define amdgpu_kernel void @test_use_native_rsqrt(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z5rsqrtf(float %tmp) + %call = call fast float @_Z5rsqrtf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -667,11 +667,11 @@ entry: declare float @_Z5rsqrtf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan -; GCN-NATIVE: tail call fast float @_Z10native_tanf(float %tmp) +; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp) define amdgpu_kernel void @test_use_native_tan(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 - %call = tail call fast float @_Z3tanf(float %tmp) + %call = call fast float @_Z3tanf(float %tmp) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -679,14 +679,14 @@ entry: declare float @_Z3tanf(float) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos -; GCN-NATIVE: tail call float @_Z10native_sinf(float %tmp) -; GCN-NATIVE: tail call float @_Z10native_cosf(float %tmp) +; GCN-NATIVE: call float @_Z10native_sinf(float %tmp) +; GCN-NATIVE: call float @_Z10native_cosf(float %tmp) define amdgpu_kernel void @test_use_native_sincos(float addrspace(1)* %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 %tmp1 = addrspacecast float addrspace(1)* %arrayidx1 to float* - %call = tail call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) + %call = call fast float @_Z6sincosfPf(float %tmp, float* %tmp1) store float %call, float addrspace(1)* %a, align 4 ret void } @@ -703,10 +703,10 @@ define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 a entry: %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* - %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 - %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) - %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 - tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) + %tmp2 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 + %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) + %tmp4 = call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 + call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) ret void } @@ -725,10 +725,10 @@ define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 entry: %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8* - %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 - %tmp3 = tail call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 - %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 - tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 + %tmp2 = call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8* %tmp1, i32 4, i32 4) #0 + %tmp3 = call %opencl.reserve_id_t addrspace(5)* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 + %tmp4 = call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 2, i8* %tmp1, i32 4, i32 4) #0 + call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t addrspace(5)* %tmp3, i32 4, i32 4) #0 ret void } @@ -755,31 +755,31 @@ declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_ define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { entry: %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8* - %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 + %tmp1 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8* %tmp, i32 1, i32 1) #0 %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8* - %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 + %tmp4 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8* %tmp3, i32 2, i32 2) #0 %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8* - %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 + %tmp7 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8* %tmp6, i32 4, i32 4) #0 %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8* - %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 + %tmp10 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8* %tmp9, i32 8, i32 8) #0 %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8* - %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 + %tmp13 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8* %tmp12, i32 16, i32 16) #0 %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8* - %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 + %tmp16 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8* %tmp15, i32 32, i32 32) #0 %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8* - %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 + %tmp19 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8* %tmp18, i32 64, i32 64) #0 %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8* - %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 + %tmp22 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8* %tmp21, i32 128, i32 128) #0 %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8* - %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 + %tmp25 = call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8* %tmp24, i32 400, i32 4) #0 ret void } diff --git a/llvm/test/Feature/optnone-opt.ll b/llvm/test/Feature/optnone-opt.ll index ae0e1a48acc58..f706ade7934f4 100644 --- a/llvm/test/Feature/optnone-opt.ll +++ b/llvm/test/Feature/optnone-opt.ll @@ -39,16 +39,10 @@ attributes #0 = { optnone noinline } ; IR passes run at -O1 and higher. ; OPT-O1-DAG: Skipping pass 'Aggressive Dead Code Elimination' ; OPT-O1-DAG: Skipping pass 'Combine redundant instructions' -; OPT-O1-DAG: Skipping pass 'Dead Store Elimination' ; OPT-O1-DAG: Skipping pass 'Early CSE' -; OPT-O1-DAG: Skipping pass 'Jump Threading' -; OPT-O1-DAG: Skipping pass 'MemCpy Optimization' ; OPT-O1-DAG: Skipping pass 'Reassociate expressions' ; OPT-O1-DAG: Skipping pass 'Simplify the CFG' ; OPT-O1-DAG: Skipping pass 'Sparse Conditional Constant Propagation' -; OPT-O1-DAG: Skipping pass 'SROA' -; OPT-O1-DAG: Skipping pass 'Tail Call Elimination' -; OPT-O1-DAG: Skipping pass 'Value Propagation' ; Additional IR passes run at -O2 and higher. ; OPT-O2O3-DAG: Skipping pass 'Global Value Numbering' diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 009f19e544c85..e79a359277f6f 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -12,66 +12,70 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \ +; RUN: --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ +; RUN: --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Os +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Os \ +; RUN: --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='default' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Oz +; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-Oz \ +; RUN: --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O2 \ -; RUN: --check-prefix=CHECK-O2-LTO +; RUN: --check-prefix=CHECK-O2-LTO --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-peephole='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PEEPHOLE +; RUN: --check-prefix=CHECK-EP-PEEPHOLE --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-late-loop-optimizations='no-op-loop' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-LOOP-LATE +; RUN: --check-prefix=CHECK-EP-LOOP-LATE --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-loop-optimizer-end='no-op-loop' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-LOOP-END +; RUN: --check-prefix=CHECK-EP-LOOP-END --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-scalar-optimizer-late='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-SCALAR-LATE +; RUN: --check-prefix=CHECK-EP-SCALAR-LATE --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-cgscc-optimizer-late='no-op-cgscc' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-CGSCC-LATE +; RUN: --check-prefix=CHECK-EP-CGSCC-LATE --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-vectorizer-start='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-VECTORIZER-START +; RUN: --check-prefix=CHECK-EP-VECTORIZER-START --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PIPELINE-START +; RUN: --check-prefix=CHECK-EP-PIPELINE-START --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='lto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-PIPELINE-START +; RUN: --check-prefix=CHECK-EP-PIPELINE-START --check-prefix=CHECK-O23SZ ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes-ep-optimizer-last='no-op-function' \ ; RUN: -passes='default' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O --check-prefix=CHECK-O3 \ -; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST +; RUN: --check-prefix=CHECK-EP-OPTIMIZER-LAST --check-prefix=CHECK-O23SZ ; CHECK-O: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting llvm::Module pass manager run. @@ -132,10 +136,10 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis -; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass -; CHECK-O-NEXT: Running pass: JumpThreadingPass -; CHECK-O-NEXT: Running analysis: LazyValueAnalysis -; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass @@ -143,7 +147,7 @@ ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass -; CHECK-O-NEXT: Running pass: TailCallElimPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis @@ -180,22 +184,10 @@ ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo -; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-Os-NEXT: Running pass: GVN -; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis -; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-Oz-NEXT: Running pass: GVN -; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis -; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-O2-NEXT: Running pass: GVN -; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis -; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass -; CHECK-O3-NEXT: Running pass: GVN -; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis +; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass +; CHECK-O23SZ-NEXT: Running pass: GVN +; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-O23SZ-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O-NEXT: Running pass: MemCpyOptPass ; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis ; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis @@ -204,14 +196,14 @@ ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass -; CHECK-O-NEXT: Running pass: JumpThreadingPass -; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass -; CHECK-O-NEXT: Running pass: DSEPass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> -; CHECK-O-NEXT: Starting llvm::Function pass manager run. -; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Running pass: DSEPass +; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> +; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run. +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass +; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run. ; CHECK-EP-SCALAR-LATE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index a0b4df044450f..c93b360009b25 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -13,19 +13,19 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O2 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O3,CHECK-EP-PIPELINE-START +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-O3,CHECK-EP-PIPELINE-START ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Os +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Os ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Oz +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-PRELINK-Oz ; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \ ; RUN: -passes='thinlto-pre-link,name-anon-globals' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-PRELINK-O,CHECK-PRELINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O2 ; ; Postlink pipelines: ; RUN: opt -disable-verify -debug-pass-manager \ @@ -33,19 +33,19 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,CHECK-POSTLINK-O1 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 ; RUN: opt -disable-verify -debug-pass-manager -passes-ep-pipeline-start='no-op-module' \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-POSTLINK-O,CHECK-POSTLINK-O3 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O3 ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-POSTLINK-O,CHECK-POSTLINK-Os +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Os,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-Os ; RUN: opt -disable-verify -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-POSTLINK-O,CHECK-POSTLINK-Oz +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-Oz,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-Oz ; RUN: opt -disable-verify -debug-pass-manager -new-pm-debug-info-for-profiling \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,CHECK-POSTLINK-O2 ; ; CHECK-O: Running analysis: PassInstrumentationAnalysis ; CHECK-O-NEXT: Starting llvm::Module pass manager run. @@ -112,17 +112,17 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis -; CHECK-O-NEXT: Running pass: SpeculativeExecutionPass -; CHECK-O-NEXT: Running pass: JumpThreadingPass -; CHECK-O-NEXT: Running analysis: LazyValueAnalysis -; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass -; CHECK-O-NEXT: Running pass: TailCallElimPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis @@ -180,14 +180,14 @@ ; CHECK-O-NEXT: Running pass: BDCEPass ; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-O-NEXT: Running pass: JumpThreadingPass -; CHECK-O-NEXT: Running pass: CorrelatedValuePropagationPass -; CHECK-O-NEXT: Running pass: DSEPass -; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> -; CHECK-O-NEXT: Starting llvm::Function pass manager run -; CHECK-O-NEXT: Running pass: LoopSimplifyPass -; CHECK-O-NEXT: Running pass: LCSSAPass -; CHECK-O-NEXT: Finished llvm::Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Running pass: DSEPass +; CHECK-O23SZ-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass{{.*}}> +; CHECK-O23SZ-NEXT: Starting llvm::Function pass manager run +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass +; CHECK-O23SZ-NEXT: Finished llvm::Function pass manager run ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index 9ddf3f4f9c290..ad14bdd6df661 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O1 -S | FileCheck %s +; RUN: opt < %s -O2 -S | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. diff --git a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll index 6934623463575..6b3ba66c951eb 100644 --- a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll +++ b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll @@ -7,7 +7,7 @@ define i1 @PR33605(i32 %a, i32 %b, i32* %c) { ; ALL-LABEL: @PR33605( -; ALL-NEXT: for.body: +; ALL-NEXT: entry: ; ALL-NEXT: [[OR:%.*]] = or i32 [[B:%.*]], [[A:%.*]] ; ALL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 1 ; ALL-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 @@ -15,16 +15,16 @@ define i1 @PR33605(i32 %a, i32 %b, i32* %c) { ; ALL-NEXT: br i1 [[CMP]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; ALL: if.then: ; ALL-NEXT: store i32 [[OR]], i32* [[ARRAYIDX]], align 4 -; ALL-NEXT: tail call void @foo() +; ALL-NEXT: call void @foo() ; ALL-NEXT: br label [[IF_END]] ; ALL: if.end: -; ALL-NEXT: [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[FOR_BODY:%.*]] ] +; ALL-NEXT: [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[ENTRY:%.*]] ] ; ALL-NEXT: [[TMP1:%.*]] = load i32, i32* [[C]], align 4 ; ALL-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]] ; ALL-NEXT: br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]] ; ALL: if.then.1: ; ALL-NEXT: store i32 [[OR]], i32* [[C]], align 4 -; ALL-NEXT: tail call void @foo() +; ALL-NEXT: call void @foo() ; ALL-NEXT: br label [[IF_END_1]] ; ALL: if.end.1: ; ALL-NEXT: [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll b/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll index 4d4a30e00eafb..82f5cfbc9d517 100644 --- a/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll +++ b/llvm/test/Transforms/PhaseOrdering/two-shifts-by-sext.ll @@ -74,7 +74,7 @@ define i32 @two_shifts_by_same_sext(i32 %val, i8 signext %len) { define i32 @two_shifts_by_sext_with_extra_use(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_sext_with_extra_use( ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 -; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]]) +; CHECK-NEXT: call void @use_int32(i32 [[CONV]]) ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] ; CHECK-NEXT: ret i32 [[SHR]] @@ -101,7 +101,7 @@ declare void @use_int32(i32) define i32 @two_shifts_by_same_sext_with_extra_use(i32 %val, i8 signext %len) { ; CHECK-LABEL: @two_shifts_by_same_sext_with_extra_use( ; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[LEN:%.*]] to i32 -; CHECK-NEXT: tail call void @use_int32(i32 [[CONV]]) +; CHECK-NEXT: call void @use_int32(i32 [[CONV]]) ; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[VAL:%.*]], [[CONV]] ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[SHL]], [[CONV]] ; CHECK-NEXT: ret i32 [[SHR]] From 98189755cd98f6e1e22e03e55b951d3ed53a5ae5 Mon Sep 17 00:00:00 2001 From: czhengsz Date: Mon, 25 Nov 2019 21:18:32 -0500 Subject: [PATCH 088/591] [PowerPC] [NFC] change PPCLoopPreIncPrep class name after D67088. Afer https://reviews.llvm.org/D67088, PPCLoopPreIncPrep pass can prepare more instruction forms except pre inc form, like DS/DQ forms. This patch is a follow-up of https://reviews.llvm.org/D67088 to rename the pass name. Reviewed by: jsji Differential Revision: https://reviews.llvm.org/D70371 --- llvm/lib/Target/PowerPC/PPC.h | 4 +- .../Target/PowerPC/PPCLoopInstrFormPrep.cpp | 46 +++++++++---------- llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 10 ++-- llvm/test/CodeGen/PowerPC/addi-licm.ll | 2 +- llvm/test/CodeGen/PowerPC/ppc-passname.ll | 20 ++++---- 5 files changed, 41 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index 5a830d2294116..a83509f0e6870 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -34,7 +34,7 @@ namespace llvm { #ifndef NDEBUG FunctionPass *createPPCCTRLoopsVerify(); #endif - FunctionPass *createPPCLoopPreIncPrepPass(PPCTargetMachine &TM); + FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM); FunctionPass *createPPCTOCRegDepsPass(); FunctionPass *createPPCEarlyReturnPass(); FunctionPass *createPPCVSXCopyPass(); @@ -60,7 +60,7 @@ namespace llvm { #ifndef NDEBUG void initializePPCCTRLoopsVerifyPass(PassRegistry&); #endif - void initializePPCLoopPreIncPrepPass(PassRegistry&); + void initializePPCLoopInstrFormPrepPass(PassRegistry&); void initializePPCTOCRegDepsPass(PassRegistry&); void initializePPCEarlyReturnPass(PassRegistry&); void initializePPCVSXCopyPass(PassRegistry&); diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 72c347e005192..086db4ef9ec90 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -1,4 +1,4 @@ -//===------ PPCLoopPreIncPrep.cpp - Loop Pre-Inc. AM Prep. Pass -----------===// +//===------ PPCLoopInstrFormPrep.cpp - Loop Instr Form Prep Pass ----------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -41,7 +41,7 @@ // *++p = c; //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppc-loop-preinc-prep" +#define DEBUG_TYPE "ppc-loop-instr-form-prep" #include "PPC.h" #include "PPCSubtarget.h" @@ -148,16 +148,16 @@ namespace { // For DQ form instructions, their displacements must be multiple of 16. enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 }; - class PPCLoopPreIncPrep : public FunctionPass { + class PPCLoopInstrFormPrep : public FunctionPass { public: static char ID; // Pass ID, replacement for typeid - PPCLoopPreIncPrep() : FunctionPass(ID) { - initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); + PPCLoopInstrFormPrep() : FunctionPass(ID) { + initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); } - PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { - initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry()); + PPCLoopInstrFormPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { + initializePPCLoopInstrFormPrepPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -237,20 +237,20 @@ namespace { } // end anonymous namespace -char PPCLoopPreIncPrep::ID = 0; -static const char *name = "Prepare loop for pre-inc. addressing modes"; -INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) +char PPCLoopInstrFormPrep::ID = 0; +static const char *name = "Prepare loop for ppc preferred instruction forms"; +INITIALIZE_PASS_BEGIN(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false) +INITIALIZE_PASS_END(PPCLoopInstrFormPrep, DEBUG_TYPE, name, false, false) static const std::string PHINodeNameSuffix = ".phi"; static const std::string CastNodeNameSuffix = ".cast"; static const std::string GEPNodeIncNameSuffix = ".inc"; static const std::string GEPNodeOffNameSuffix = ".off"; -FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) { - return new PPCLoopPreIncPrep(TM); +FunctionPass *llvm::createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM) { + return new PPCLoopInstrFormPrep(TM); } static bool IsPtrInBounds(Value *BasePtr) { @@ -284,7 +284,7 @@ static Value *GetPointerOperand(Value *MemI) { return nullptr; } -bool PPCLoopPreIncPrep::runOnFunction(Function &F) { +bool PPCLoopInstrFormPrep::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -305,7 +305,7 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) { return MadeChange; } -void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, +void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, SmallVector &Buckets, unsigned MaxCandidateNum) { assert((MemI && GetPointerOperand(MemI)) && @@ -328,7 +328,7 @@ void PPCLoopPreIncPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV, } } -SmallVector PPCLoopPreIncPrep::collectCandidates( +SmallVector PPCLoopInstrFormPrep::collectCandidates( Loop *L, std::function isValidCandidate, unsigned MaxCandidateNum) { @@ -369,7 +369,7 @@ SmallVector PPCLoopPreIncPrep::collectCandidates( return Buckets; } -bool PPCLoopPreIncPrep::prepareBaseForDispFormChain(Bucket &BucketChain, +bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain, InstrForm Form) { // RemainderOffsetInfo details: // key: value of (Offset urem DispConstraint). For DSForm, it can @@ -444,7 +444,7 @@ bool PPCLoopPreIncPrep::prepareBaseForDispFormChain(Bucket &BucketChain, // {-32769, 2003, 2007, 2011}, we choose -32769 as base offset, and left disp // for load/stores are {0, 34772, 34776, 34780}. Though each offset now is a // multipler of 4, it cannot be represented by sint16. -bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { +bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { // We have a choice now of which instruction's memory operand we use as the // base for the generated PHI. Always picking the first instruction in each // bucket does not work well, specifically because that instruction might @@ -484,7 +484,7 @@ bool PPCLoopPreIncPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { return true; } -bool PPCLoopPreIncPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, +bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, SmallSet &BBChanged, InstrForm Form) { bool MadeChange = false; @@ -676,7 +676,7 @@ bool PPCLoopPreIncPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain, return MadeChange; } -bool PPCLoopPreIncPrep::updateFormPrep(Loop *L, +bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L, SmallVector &Buckets) { bool MadeChange = false; if (Buckets.empty()) @@ -695,7 +695,7 @@ bool PPCLoopPreIncPrep::updateFormPrep(Loop *L, return MadeChange; } -bool PPCLoopPreIncPrep::dispFormPrep(Loop *L, SmallVector &Buckets, +bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector &Buckets, InstrForm Form) { bool MadeChange = false; @@ -721,7 +721,7 @@ bool PPCLoopPreIncPrep::dispFormPrep(Loop *L, SmallVector &Buckets, // This function will check to see if that PHI already exists and will return // true if it found an existing PHI with the matched start and increment as the // one we wanted to create. -bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, +bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI, const SCEV *BasePtrStartSCEV, const SCEVConstant *BasePtrIncSCEV, InstrForm Form) { @@ -787,7 +787,7 @@ bool PPCLoopPreIncPrep::alreadyPrepared(Loop *L, Instruction* MemI, return false; } -bool PPCLoopPreIncPrep::runOnLoop(Loop *L) { +bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) { bool MadeChange = false; // Only prep. the inner-most loop diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index d548e7ace68da..35f6d32a07db2 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -51,8 +51,8 @@ opt DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, cl::desc("Disable CTR loops for PPC")); static cl:: -opt DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden, - cl::desc("Disable PPC loop preinc prep")); +opt DisableInstrFormPrep("disable-ppc-instr-form-prep", cl::Hidden, + cl::desc("Disable PPC loop instr form prep")); static cl::opt VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", @@ -104,7 +104,7 @@ extern "C" void LLVMInitializePowerPCTarget() { #ifndef NDEBUG initializePPCCTRLoopsVerifyPass(PR); #endif - initializePPCLoopPreIncPrepPass(PR); + initializePPCLoopInstrFormPrepPass(PR); initializePPCTOCRegDepsPass(PR); initializePPCEarlyReturnPass(PR); initializePPCVSXCopyPass(PR); @@ -431,8 +431,8 @@ void PPCPassConfig::addIRPasses() { } bool PPCPassConfig::addPreISel() { - if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None) - addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); + if (!DisableInstrFormPrep && getOptLevel() != CodeGenOpt::None) + addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine())); if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) addPass(createHardwareLoopsPass()); diff --git a/llvm/test/CodeGen/PowerPC/addi-licm.ll b/llvm/test/CodeGen/PowerPC/addi-licm.ll index e0314d19bd3f1..24c9805f1343d 100644 --- a/llvm/test/CodeGen/PowerPC/addi-licm.ll +++ b/llvm/test/CodeGen/PowerPC/addi-licm.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -disable-ppc-preinc-prep < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -disable-ppc-instr-form-prep < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PIP target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/llvm/test/CodeGen/PowerPC/ppc-passname.ll b/llvm/test/CodeGen/PowerPC/ppc-passname.ll index 005f0a25c5637..98343bdb535c2 100644 --- a/llvm/test/CodeGen/PowerPC/ppc-passname.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-passname.ll @@ -1,13 +1,13 @@ -; Test pass name: ppc-loop-preinc-prep. -; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-before=ppc-loop-preinc-prep -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-BEFORE-LOOP-PREINC-PREP -; STOP-BEFORE-LOOP-PREINC-PREP-NOT: -ppc-loop-preinc-prep -; STOP-BEFORE-LOOP-PREINC-PREP-NOT: "ppc-loop-preinc-prep" pass is not registered. -; STOP-BEFORE-LOOP-PREINC-PREP-NOT: Prepare loop for pre-inc. addressing modes - -; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-after=ppc-loop-preinc-prep -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-AFTER-LOOP-PREINC-PREP -; STOP-AFTER-LOOP-PREINC-PREP: -ppc-loop-preinc-prep -; STOP-AFTER-LOOP-PREINC-PREP-NOT: "ppc-loop-preinc-prep" pass is not registered. -; STOP-AFTER-LOOP-PREINC-PREP: Prepare loop for pre-inc. addressing modes +; Test pass name: ppc-loop-instr-form-prep. +; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-before=ppc-loop-instr-form-prep -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-BEFORE-LOOP-INSTR-FORM-PREP +; STOP-BEFORE-LOOP-INSTR-FORM-PREP-NOT: -ppc-loop-instr-form-prep +; STOP-BEFORE-LOOP-INSTR-FORM-PREP-NOT: "ppc-loop-instr-form-prep" pass is not registered. +; STOP-BEFORE-LOOP-INSTR-FORM-PREP-NOT: Prepare loop for ppc preferred instruction forms + +; RUN: llc -mtriple=powerpc64le-unknown-unknown < %s -debug-pass=Structure -stop-after=ppc-loop-instr-form-prep -o /dev/null 2>&1 | FileCheck %s -check-prefix=STOP-AFTER-LOOP-INSTR-FORM-PREP +; STOP-AFTER-LOOP-INSTR-FORM-PREP: -ppc-loop-instr-form-prep +; STOP-AFTER-LOOP-INSTR-FORM-PREP-NOT: "ppc-loop-instr-form-prep" pass is not registered. +; STOP-AFTER-LOOP-INSTR-FORM-PREP: Prepare loop for ppc preferred instruction forms ; Test pass name: ppc-toc-reg-deps. From e68b8161781e6de4e0c412609b35723da9706e29 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 27 Nov 2019 09:04:04 +0100 Subject: [PATCH 089/591] Update build_llvm_package.bat to build from the monorepo --- llvm/utils/release/build_llvm_package.bat | 56 +++++++++++------------ 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/llvm/utils/release/build_llvm_package.bat b/llvm/utils/release/build_llvm_package.bat index 5b0d71fa72ce2..7556bfa854e3d 100755 --- a/llvm/utils/release/build_llvm_package.bat +++ b/llvm/utils/release/build_llvm_package.bat @@ -8,7 +8,7 @@ REM Usage: build_llvm_package.bat REM Prerequisites: REM -REM Visual Studio 2019, CMake, Ninja, SVN, GNUWin32, SWIG, Python 3, +REM Visual Studio 2019, CMake, Ninja, GNUWin32, SWIG, Python 3, REM NSIS with the strlen_8192 patch, REM Visual Studio 2019 SDK and Nuget (for the clang-format plugin), REM Perl (for the OpenMP run-time). @@ -24,13 +24,13 @@ set vsdevcmd=C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\Co set python32_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python36-32 set python64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python36 +for /f "usebackq" %%i in (`PowerShell ^(Get-Date^).ToString^('yyyyMMdd'^)`) do set datestamp=%%i + set revision=%1 -set branch=trunk -set package_version=10.0.0-r%revision% -set clang_format_vs_version=10.0.0.%revision% +set package_version=10.0.0-%revision% +set clang_format_vs_version=10.0.0.%datestamp% set build_dir=llvm_package_%revision% -echo Branch: %branch% echo Revision: %revision% echo Package version: %package_version% echo Clang format plugin version: %clang_format_vs_version% @@ -41,28 +41,24 @@ pause mkdir %build_dir% cd %build_dir% -echo Checking out %branch% at r%revision%... -svn.exe export -r %revision% http://llvm.org/svn/llvm-project/llvm/%branch% llvm || exit /b -svn.exe export -r %revision% http://llvm.org/svn/llvm-project/cfe/%branch% llvm/tools/clang || exit /b -svn.exe export -r %revision% http://llvm.org/svn/llvm-project/clang-tools-extra/%branch% llvm/tools/clang/tools/extra || exit /b -svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lld/%branch% llvm/tools/lld || exit /b -svn.exe export -r %revision% http://llvm.org/svn/llvm-project/compiler-rt/%branch% llvm/projects/compiler-rt || exit /b -svn.exe export -r %revision% http://llvm.org/svn/llvm-project/openmp/%branch% llvm/projects/openmp || exit /b -svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lldb/%branch% llvm/tools/lldb || exit /b - +echo Checking out %revision% +curl -L https://github.com/llvm/llvm-project/archive/%revision%.zip -o src.zip || exit /b +7z x src.zip || exit /b +mv llvm-project-* llvm-project || exit /b REM Setting CMAKE_CL_SHOWINCLUDES_PREFIX to work around PR27226. set cmake_flags=^ - -DCMAKE_BUILD_TYPE=Release ^ - -DLLVM_ENABLE_ASSERTIONS=ON ^ - -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON ^ - -DLLVM_BUILD_LLVM_C_DYLIB=ON ^ - -DCMAKE_INSTALL_UCRT_LIBRARIES=ON ^ - -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% ^ - -DPACKAGE_VERSION=%package_version% ^ - -DLLDB_RELOCATABLE_PYTHON=1 ^ - -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe ^ - -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: " + -DCMAKE_BUILD_TYPE=Release ^ + -DLLVM_ENABLE_ASSERTIONS=ON ^ + -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON ^ + -DLLVM_BUILD_LLVM_C_DYLIB=ON ^ + -DCMAKE_INSTALL_UCRT_LIBRARIES=ON ^ + -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% ^ + -DPACKAGE_VERSION=%package_version% ^ + -DLLDB_RELOCATABLE_PYTHON=1 ^ + -DLLDB_TEST_COMPILER=%cd%\build32_stage0\bin\clang.exe ^ + -DCMAKE_CL_SHOWINCLUDES_PREFIX="Note: including file: " ^ + -DLLVM_ENABLE_PROJECTS="clang;clang-tools-extra;lld;compiler-rt;openmp;lldb" REM TODO: Run the "check-all" tests. @@ -72,7 +68,7 @@ set CC= set CXX= mkdir build32_stage0 cd build32_stage0 -cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm || exit /b +cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm-project\llvm || exit /b ninja all || ninja all || ninja all || exit /b ninja check || ninja check || ninja check || exit /b ninja check-clang || ninja check-clang || ninja check-clang || exit /b @@ -86,7 +82,7 @@ mkdir build32 cd build32 set CC=..\build32_stage0\bin\clang-cl set CXX=..\build32_stage0\bin\clang-cl -cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm || exit /b +cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm-project\llvm || exit /b ninja all || ninja all || ninja all || exit /b ninja check || ninja check || ninja check || exit /b ninja check-clang || ninja check-clang || ninja check-clang || exit /b @@ -104,9 +100,9 @@ REM Having VSSDKINSTALL set makes devenv *not* find the SDK for some reason. set VSSDKINSTALL= set CC=..\build32_stage0\bin\clang-cl set CXX=..\build32_stage0\bin\clang-cl -cmake -GNinja %cmake_flags% -DLLVM_USE_CRT_RELEASE=MT -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm || exit /b +cmake -GNinja %cmake_flags% -DLLVM_USE_CRT_RELEASE=MT -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON -DPYTHON_HOME=%python32_dir% -DPYTHON_EXECUTABLE=%python32_dir%\python.exe ..\llvm-project\llvm || exit /b ninja clang_format_vsix || exit /b -copy ..\llvm\tools\clang\tools\clang-format-vs\ClangFormat\bin\Release\ClangFormat.vsix ClangFormat-r%revision%.vsix +copy ..\llvm-project\llvm\tools\clang\tools\clang-format-vs\ClangFormat\bin\Release\ClangFormat.vsix ClangFormat-r%revision%.vsix cd .. @@ -116,7 +112,7 @@ set CC= set CXX= mkdir build64_stage0 cd build64_stage0 -cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DPYTHON_EXECUTABLE=%python64_dir%\python.exe ..\llvm || exit /b +cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DPYTHON_EXECUTABLE=%python64_dir%\python.exe ..\llvm-project\llvm || exit /b ninja all || ninja all || ninja all || exit /b ninja check || ninja check || ninja check || exit /b ninja check-clang || ninja check-clang || ninja check-clang || exit /b @@ -130,7 +126,7 @@ mkdir build64 cd build64 set CC=..\build64_stage0\bin\clang-cl set CXX=..\build64_stage0\bin\clang-cl -cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DPYTHON_EXECUTABLE=%python64_dir%\python.exe ..\llvm || exit /b +cmake -GNinja %cmake_flags% -DPYTHON_HOME=%python64_dir% -DPYTHON_EXECUTABLE=%python64_dir%\python.exe ..\llvm-project\llvm || exit /b ninja all || ninja all || ninja all || exit /b ninja check || ninja check || ninja check || exit /b ninja check-clang || ninja check-clang || ninja check-clang || exit /b From 3a280422b66a31af694782746ec0b5b7552a82a1 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 27 Nov 2019 08:09:52 +0100 Subject: [PATCH 090/591] [lldb][NFC] Early exit in DWARFASTParserClang::ParseArrayType --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 148 +++++++++--------- 1 file changed, 74 insertions(+), 74 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index fe6ab3064447e..df5c81f2e8305 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1257,83 +1257,83 @@ TypeSP DWARFASTParserClang::ParseArrayType(const DWARFDIE &die, DWARFDIE type_die = attrs.type.Reference(); Type *element_type = dwarf->ResolveTypeUID(type_die, true); - if (element_type) { - auto array_info = ParseChildArrayInfo(die); - if (array_info) { - attrs.byte_stride = array_info->byte_stride; - attrs.bit_stride = array_info->bit_stride; - } - if (attrs.byte_stride == 0 && attrs.bit_stride == 0) - attrs.byte_stride = element_type->GetByteSize().getValueOr(0); - CompilerType array_element_type = element_type->GetForwardCompilerType(); - - if (ClangASTContext::IsCXXClassType(array_element_type) && - !array_element_type.GetCompleteType()) { - ModuleSP module_sp = die.GetModule(); - if (module_sp) { - if (die.GetCU()->GetProducer() == eProducerClang) - module_sp->ReportError( - "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " - "class/union/struct element type DIE 0x%8.8x that is a " - "forward declaration, not a complete definition.\nTry " - "compiling the source file with -fstandalone-debug or " - "disable -gmodules", - die.GetOffset(), type_die.GetOffset()); - else - module_sp->ReportError( - "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " - "class/union/struct element type DIE 0x%8.8x that is a " - "forward declaration, not a complete definition.\nPlease " - "file a bug against the compiler and include the " - "preprocessed output for %s", - die.GetOffset(), type_die.GetOffset(), GetUnitName(die).c_str()); - } - - // We have no choice other than to pretend that the element class - // type is complete. If we don't do this, clang will crash when - // trying to layout the class. Since we provide layout - // assistance, all ivars in this class and other classes will be - // fine, this is the best we can do short of crashing. - if (ClangASTContext::StartTagDeclarationDefinition(array_element_type)) { - ClangASTContext::CompleteTagDeclarationDefinition(array_element_type); - } else { - module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to " - "start its definition.\nPlease file a " - "bug and attach the file at the start " - "of this error message", - type_die.GetOffset()); - } - } + if (!element_type) + return nullptr; - uint64_t array_element_bit_stride = - attrs.byte_stride * 8 + attrs.bit_stride; - CompilerType clang_type; - if (array_info && array_info->element_orders.size() > 0) { - uint64_t num_elements = 0; - auto end = array_info->element_orders.rend(); - for (auto pos = array_info->element_orders.rbegin(); pos != end; ++pos) { - num_elements = *pos; - clang_type = m_ast.CreateArrayType(array_element_type, num_elements, - attrs.is_vector); - array_element_type = clang_type; - array_element_bit_stride = num_elements - ? array_element_bit_stride * num_elements - : array_element_bit_stride; - } + llvm::Optional array_info = ParseChildArrayInfo(die); + if (array_info) { + attrs.byte_stride = array_info->byte_stride; + attrs.bit_stride = array_info->bit_stride; + } + if (attrs.byte_stride == 0 && attrs.bit_stride == 0) + attrs.byte_stride = element_type->GetByteSize().getValueOr(0); + CompilerType array_element_type = element_type->GetForwardCompilerType(); + + if (ClangASTContext::IsCXXClassType(array_element_type) && + !array_element_type.GetCompleteType()) { + ModuleSP module_sp = die.GetModule(); + if (module_sp) { + if (die.GetCU()->GetProducer() == eProducerClang) + module_sp->ReportError( + "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " + "class/union/struct element type DIE 0x%8.8x that is a " + "forward declaration, not a complete definition.\nTry " + "compiling the source file with -fstandalone-debug or " + "disable -gmodules", + die.GetOffset(), type_die.GetOffset()); + else + module_sp->ReportError( + "DWARF DW_TAG_array_type DIE at 0x%8.8x has a " + "class/union/struct element type DIE 0x%8.8x that is a " + "forward declaration, not a complete definition.\nPlease " + "file a bug against the compiler and include the " + "preprocessed output for %s", + die.GetOffset(), type_die.GetOffset(), GetUnitName(die).c_str()); + } + + // We have no choice other than to pretend that the element class + // type is complete. If we don't do this, clang will crash when + // trying to layout the class. Since we provide layout + // assistance, all ivars in this class and other classes will be + // fine, this is the best we can do short of crashing. + if (ClangASTContext::StartTagDeclarationDefinition(array_element_type)) { + ClangASTContext::CompleteTagDeclarationDefinition(array_element_type); } else { - clang_type = - m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector); - } - ConstString empty_name; - TypeSP type_sp = std::make_shared( - die.GetID(), dwarf, empty_name, array_element_bit_stride / 8, nullptr, - dwarf->GetUID(type_die), Type::eEncodingIsUID, &attrs.decl, clang_type, - Type::ResolveState::Full); - type_sp->SetEncodingType(element_type); - m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID()); - return type_sp; + module_sp->ReportError("DWARF DIE at 0x%8.8x was not able to " + "start its definition.\nPlease file a " + "bug and attach the file at the start " + "of this error message", + type_die.GetOffset()); + } } - return nullptr; + + uint64_t array_element_bit_stride = + attrs.byte_stride * 8 + attrs.bit_stride; + CompilerType clang_type; + if (array_info && array_info->element_orders.size() > 0) { + uint64_t num_elements = 0; + auto end = array_info->element_orders.rend(); + for (auto pos = array_info->element_orders.rbegin(); pos != end; ++pos) { + num_elements = *pos; + clang_type = m_ast.CreateArrayType(array_element_type, num_elements, + attrs.is_vector); + array_element_type = clang_type; + array_element_bit_stride = num_elements + ? array_element_bit_stride * num_elements + : array_element_bit_stride; + } + } else { + clang_type = + m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector); + } + ConstString empty_name; + TypeSP type_sp = std::make_shared( + die.GetID(), dwarf, empty_name, array_element_bit_stride / 8, nullptr, + dwarf->GetUID(type_die), Type::eEncodingIsUID, &attrs.decl, clang_type, + Type::ResolveState::Full); + type_sp->SetEncodingType(element_type); + m_ast.SetMetadataAsUserID(clang_type.GetOpaqueQualType(), die.GetID()); + return type_sp; } TypeSP DWARFASTParserClang::ParsePointerToMemberType( From 344bdeb797b31bb99158010f255a7219fe77e2ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 16 Oct 2019 11:41:59 +0300 Subject: [PATCH 091/591] [LLDB] Avoid using InitializeContext for zero-initializing a CONTEXT. NFC. InitializeContext is useful for allocating a (potentially variable size) CONTEXT struct in an unaligned byte buffer. In this case, we already have a fixed size CONTEXT we want to initialize, and we only used this as a very roundabout way of zero initializing it. Instead just memset the CONTEXT we have, and set the ContextFlags field manually. This matches how it is done in NativeRegisterContextWindows_*.cpp. This also makes LLDB run successfully in Wine (for a trivial tested case at least), as Wine hasn't implemented the InitializeContext function. Differential Revision: https://reviews.llvm.org/D70742 --- .../Process/Windows/Common/RegisterContextWindows.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp index 28e7a590ff9f3..c3cb45530f2ad 100644 --- a/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp @@ -154,15 +154,8 @@ bool RegisterContextWindows::CacheAllRegisterValues() { return true; TargetThreadWindows &wthread = static_cast(m_thread); - uint8_t buffer[2048]; - memset(buffer, 0, sizeof(buffer)); - PCONTEXT tmpContext = NULL; - DWORD contextLength = (DWORD)sizeof(buffer); - if (!::InitializeContext(buffer, kWinContextFlags, &tmpContext, - &contextLength)) { - return false; - } - memcpy(&m_context, tmpContext, sizeof(m_context)); + memset(&m_context, 0, sizeof(m_context)); + m_context.ContextFlags = kWinContextFlags; if (::SuspendThread( wthread.GetHostThread().GetNativeThread().GetSystemHandle()) == (DWORD)-1) { From 943513b79929fba1a9dccdf81cb68a41ce29cd03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 26 Nov 2019 22:41:40 +0200 Subject: [PATCH 092/591] [X86] [Win64] Avoid truncating large (> 32 bit) stack allocations This fixes PR44129, which was broken in a7adc3185b (in 7.0.0 and newer). Differential Revision: https://reviews.llvm.org/D70741 --- llvm/lib/Target/X86/X86FrameLowering.cpp | 2 +- llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 3374cd054a6e1..799c1f5d1285e 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -1261,7 +1261,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (Is64Bit) { // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. - int Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; + int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; if (isUInt<32>(Alloc)) { BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) .addImm(Alloc) diff --git a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll new file mode 100644 index 0000000000000..9555ce032db90 --- /dev/null +++ b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=x86_64-windows-gnu | FileCheck %s + +define void @foo() unnamed_addr #0 { +start: + %b = alloca i64, align 8 + %c = alloca [4294967295 x i8], align 1 + ret void +} + +attributes #0 = { nonlazybind uwtable "probe-stack"="probe_stack" "target-cpu"="x86-64" } + +; CHECK-LABEL: foo: +; CHECK: movabsq $4294967304, %rax +; CHECK-NEXT: callq probe_stack From 47046f05e66c23567d6672ea5a1afd8ed2c411ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sat, 23 Nov 2019 00:46:24 +0200 Subject: [PATCH 093/591] [MC] Produce proper section relative relocations for COFF in .debug_frame The third parameter to Streamer.EmitSymbolValue() is "bool IsSectionRelative = false". For ELF, these debug sections are mapped to address zero, so a normal, absolute address relocation works just fine, but COFF needs a section relative relocation, and COFF is the only target where needsDwarfSectionOffsetDirective() returns true. This matches how EmitSymbolValue is called elsewhere in the same source file. Differential Revision: https://reviews.llvm.org/D70661 --- llvm/lib/MC/MCDwarf.cpp | 3 ++- llvm/test/MC/COFF/cfi-sections.s | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/COFF/cfi-sections.s diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index bcc7c45afc01b..b4b3c9956cc2d 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -1701,7 +1701,8 @@ void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart, MakeStartMinusEndExpr(Streamer, SectionStart, cieStart, 0); emitAbsValue(Streamer, offset, 4); } else { - Streamer.EmitSymbolValue(&cieStart, 4); + Streamer.EmitSymbolValue(&cieStart, 4, + asmInfo->needsDwarfSectionOffsetDirective()); } // PC Begin diff --git a/llvm/test/MC/COFF/cfi-sections.s b/llvm/test/MC/COFF/cfi-sections.s new file mode 100644 index 0000000000000..00a8d746c194d --- /dev/null +++ b/llvm/test/MC/COFF/cfi-sections.s @@ -0,0 +1,26 @@ +// RUN: llvm-mc -filetype=obj -triple x86_64-mingw32 %s -o - | llvm-objdump -r - | FileCheck --check-prefix=COFF_X86_64 %s +// RUN: llvm-mc -filetype=obj -triple i686-mingw32 %s -o - | llvm-objdump -r - | FileCheck --check-prefix=COFF_I686 %s + +.cfi_sections .debug_frame + +f1: + .cfi_startproc + nop + .cfi_endproc + +f2: + .cfi_startproc + nop + .cfi_endproc + +// COFF_X86_64: RELOCATION RECORDS FOR [.debug_frame]: +// COFF_X86_64-NEXT: {{.*}} IMAGE_REL_AMD64_SECREL .debug_frame +// COFF_X86_64-NEXT: {{.*}} IMAGE_REL_AMD64_ADDR64 .text +// COFF_X86_64-NEXT: {{.*}} IMAGE_REL_AMD64_SECREL .debug_frame +// COFF_X86_64-NEXT: {{.*}} IMAGE_REL_AMD64_ADDR64 .text + +// COFF_I686: RELOCATION RECORDS FOR [.debug_frame]: +// COFF_I686-NEXT: {{.*}} IMAGE_REL_I386_SECREL .debug_frame +// COFF_I686-NEXT: {{.*}} IMAGE_REL_I386_DIR32 .text +// COFF_I686-NEXT: {{.*}} IMAGE_REL_I386_SECREL .debug_frame +// COFF_I686-NEXT: {{.*}} IMAGE_REL_I386_DIR32 .text From d11dc9e77b63b245f4b07fb60b2acae226e3fdcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Mon, 25 Nov 2019 13:59:08 +0200 Subject: [PATCH 094/591] [llvm-objcopy] [COFF] Fix a typo in a comment. NFC. --- llvm/tools/llvm-objcopy/COFF/Object.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-objcopy/COFF/Object.h b/llvm/tools/llvm-objcopy/COFF/Object.h index a6a3901e9c8d8..78f8da00b8cd4 100644 --- a/llvm/tools/llvm-objcopy/COFF/Object.h +++ b/llvm/tools/llvm-objcopy/COFF/Object.h @@ -124,7 +124,7 @@ struct Object { ArrayRef
getSections() const { return Sections; } // This allows mutating individual Sections, but not mutating the list - // of symbols itself. + // of sections itself. iterator_range::iterator> getMutableSections() { return make_range(Sections.begin(), Sections.end()); } From f1b117394d7f9ae6decf9730ed9d443ca1b54769 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 27 Nov 2019 09:46:56 +0100 Subject: [PATCH 095/591] [lldb][NFC] Remove unused CompilerType memory functions Summary: All these functions are unused from what I can see. Unless I'm missing something here, this code can go the way of the Dodo. Reviewers: labath Reviewed By: labath Subscribers: abidh, JDevlieghere, lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D70770 --- lldb/include/lldb/Symbol/CompilerType.h | 8 -- lldb/source/Symbol/CompilerType.cpp | 167 ------------------------ 2 files changed, 175 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h index cedd2523a5a89..91d9c5e48d20c 100644 --- a/lldb/include/lldb/Symbol/CompilerType.h +++ b/lldb/include/lldb/Symbol/CompilerType.h @@ -357,14 +357,6 @@ class CompilerType { bool GetValueAsScalar(const DataExtractor &data, lldb::offset_t data_offset, size_t data_byte_size, Scalar &value) const; - bool SetValueFromScalar(const Scalar &value, Stream &strm); - - bool ReadFromMemory(ExecutionContext *exe_ctx, lldb::addr_t addr, - AddressType address_type, DataExtractor &data); - - bool WriteToMemory(ExecutionContext *exe_ctx, lldb::addr_t addr, - AddressType address_type, StreamString &new_value); - void Clear() { m_type = nullptr; m_type_system = nullptr; diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp index 571a8570a43b3..d35213120b4dc 100644 --- a/lldb/source/Symbol/CompilerType.cpp +++ b/lldb/source/Symbol/CompilerType.cpp @@ -874,173 +874,6 @@ bool CompilerType::GetValueAsScalar(const lldb_private::DataExtractor &data, return false; } -bool CompilerType::SetValueFromScalar(const Scalar &value, Stream &strm) { - if (!IsValid()) - return false; - - // Aggregate types don't have scalar values - if (!IsAggregateType()) { - strm.GetFlags().Set(Stream::eBinary); - uint64_t count = 0; - lldb::Encoding encoding = GetEncoding(count); - - if (encoding == lldb::eEncodingInvalid || count != 1) - return false; - - llvm::Optional bit_width = GetBitSize(nullptr); - if (!bit_width) - return false; - - // This function doesn't currently handle non-byte aligned assignments - if ((*bit_width % 8) != 0) - return false; - - const uint64_t byte_size = (*bit_width + 7) / 8; - switch (encoding) { - case lldb::eEncodingInvalid: - break; - case lldb::eEncodingVector: - break; - case lldb::eEncodingUint: - switch (byte_size) { - case 1: - strm.PutHex8(value.UInt()); - return true; - case 2: - strm.PutHex16(value.UInt()); - return true; - case 4: - strm.PutHex32(value.UInt()); - return true; - case 8: - strm.PutHex64(value.ULongLong()); - return true; - default: - break; - } - break; - - case lldb::eEncodingSint: - switch (byte_size) { - case 1: - strm.PutHex8(value.SInt()); - return true; - case 2: - strm.PutHex16(value.SInt()); - return true; - case 4: - strm.PutHex32(value.SInt()); - return true; - case 8: - strm.PutHex64(value.SLongLong()); - return true; - default: - break; - } - break; - - case lldb::eEncodingIEEE754: - if (byte_size <= sizeof(long double)) { - if (byte_size == sizeof(float)) { - strm.PutFloat(value.Float()); - return true; - } else if (byte_size == sizeof(double)) { - strm.PutDouble(value.Double()); - return true; - } else if (byte_size == sizeof(long double)) { - strm.PutDouble(value.LongDouble()); - return true; - } - } - break; - } - } - return false; -} - -bool CompilerType::ReadFromMemory(lldb_private::ExecutionContext *exe_ctx, - lldb::addr_t addr, AddressType address_type, - lldb_private::DataExtractor &data) { - if (!IsValid()) - return false; - - // Can't convert a file address to anything valid without more context (which - // Module it came from) - if (address_type == eAddressTypeFile) - return false; - - if (!GetCompleteType()) - return false; - - auto byte_size = - GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr); - if (!byte_size) - return false; - - if (data.GetByteSize() < *byte_size) { - lldb::DataBufferSP data_sp(new DataBufferHeap(*byte_size, '\0')); - data.SetData(data_sp); - } - - uint8_t *dst = const_cast(data.PeekData(0, *byte_size)); - if (dst != nullptr) { - if (address_type == eAddressTypeHost) { - if (addr == 0) - return false; - // The address is an address in this process, so just copy it - memcpy(dst, reinterpret_cast(addr), *byte_size); - return true; - } else { - Process *process = nullptr; - if (exe_ctx) - process = exe_ctx->GetProcessPtr(); - if (process) { - Status error; - return process->ReadMemory(addr, dst, *byte_size, error) == *byte_size; - } - } - } - return false; -} - -bool CompilerType::WriteToMemory(lldb_private::ExecutionContext *exe_ctx, - lldb::addr_t addr, AddressType address_type, - StreamString &new_value) { - if (!IsValid()) - return false; - - // Can't convert a file address to anything valid without more context (which - // Module it came from) - if (address_type == eAddressTypeFile) - return false; - - if (!GetCompleteType()) - return false; - - auto byte_size = - GetByteSize(exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr); - if (!byte_size) - return false; - - if (*byte_size > 0) { - if (address_type == eAddressTypeHost) { - // The address is an address in this process, so just copy it - memcpy((void *)addr, new_value.GetData(), *byte_size); - return true; - } else { - Process *process = nullptr; - if (exe_ctx) - process = exe_ctx->GetProcessPtr(); - if (process) { - Status error; - return process->WriteMemory(addr, new_value.GetData(), *byte_size, - error) == *byte_size; - } - } - } - return false; -} - bool lldb_private::operator==(const lldb_private::CompilerType &lhs, const lldb_private::CompilerType &rhs) { return lhs.GetTypeSystem() == rhs.GetTypeSystem() && From e20a1e486e144c88188bc7b420885d5326b39088 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 27 Nov 2019 09:57:32 +0100 Subject: [PATCH 096/591] clang-format-vs : Fix Unicode formatting Use UTF-8 for communication with clang-format and convert the replacements offset/length to characters position/count. Internally VisualStudio.Text.Editor.IWpfTextView use sequence of Unicode characters encoded using UTF-16 and use characters position/count for manipulating text. Resolved "Error while running clang-format: Specified argument was out of the range of valid values. Parameter name: replaceSpan". Patch by empty2fill! Differential revision: https://reviews.llvm.org/D70633 --- .../ClangFormat/ClangFormatPackage.cs | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs b/clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs index 7443405efad27..26a0af3b55b50 100644 --- a/clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs +++ b/clang/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs @@ -24,6 +24,7 @@ using System.Runtime.InteropServices; using System.Xml.Linq; using System.Linq; +using System.Text; namespace LLVM.ClangFormat { @@ -292,8 +293,7 @@ private void FormatSelection(OptionPageGrid options) string text = view.TextBuffer.CurrentSnapshot.GetText(); int start = view.Selection.Start.Position.GetContainingLine().Start.Position; int end = view.Selection.End.Position.GetContainingLine().End.Position; - int length = end - start; - + // clang-format doesn't support formatting a range that starts at the end // of the file. if (start >= text.Length && text.Length > 0) @@ -301,7 +301,7 @@ private void FormatSelection(OptionPageGrid options) string path = Vsix.GetDocumentParent(view); string filePath = Vsix.GetDocumentPath(view); - RunClangFormatAndApplyReplacements(text, start, length, path, filePath, options, view); + RunClangFormatAndApplyReplacements(text, start, end, path, filePath, options, view); } /// @@ -336,11 +336,11 @@ private void FormatView(IWpfTextView view, OptionPageGrid options) RunClangFormatAndApplyReplacements(text, 0, text.Length, path, filePath, options, view); } - private void RunClangFormatAndApplyReplacements(string text, int offset, int length, string path, string filePath, OptionPageGrid options, IWpfTextView view) + private void RunClangFormatAndApplyReplacements(string text, int start, int end, string path, string filePath, OptionPageGrid options, IWpfTextView view) { try { - string replacements = RunClangFormat(text, offset, length, path, filePath, options); + string replacements = RunClangFormat(text, start, end, path, filePath, options); ApplyClangFormatReplacements(replacements, view); } catch (Exception e) @@ -363,9 +363,9 @@ private void RunClangFormatAndApplyReplacements(string text, int offset, int len /// /// Runs the given text through clang-format and returns the replacements as XML. /// - /// Formats the text range starting at offset of the given length. + /// Formats the text in range start and end. /// - private static string RunClangFormat(string text, int offset, int length, string path, string filePath, OptionPageGrid options) + private static string RunClangFormat(string text, int start, int end, string path, string filePath, OptionPageGrid options) { string vsixPath = Path.GetDirectoryName( typeof(ClangFormatPackage).Assembly.Location); @@ -373,6 +373,9 @@ private static string RunClangFormat(string text, int offset, int length, string System.Diagnostics.Process process = new System.Diagnostics.Process(); process.StartInfo.UseShellExecute = false; process.StartInfo.FileName = vsixPath + "\\clang-format.exe"; + char[] chars = text.ToCharArray(); + int offset = Encoding.UTF8.GetByteCount(chars, 0, start); + int length = Encoding.UTF8.GetByteCount(chars, 0, end) - offset; // Poor man's escaping - this will not work when quotes are already escaped // in the input (but we don't need more). string style = options.Style.Replace("\"", "\\\""); @@ -413,10 +416,11 @@ private static string RunClangFormat(string text, int offset, int length, string // 2. We write everything to the standard output - this cannot block, as clang-format // reads the full standard input before analyzing it without writing anything to the // standard output. - process.StandardInput.Write(text); + StreamWriter utf8Writer = new StreamWriter(process.StandardInput.BaseStream, new UTF8Encoding(false)); + utf8Writer.Write(text); // 3. We notify clang-format that the input is done - after this point clang-format // will start analyzing the input and eventually write the output. - process.StandardInput.Close(); + utf8Writer.Close(); // 4. We must read clang-format's output before waiting for it to exit; clang-format // will close the channel by exiting. string output = process.StandardOutput.ReadToEnd(); @@ -440,13 +444,18 @@ private static void ApplyClangFormatReplacements(string replacements, IWpfTextVi if (replacements.Length == 0) return; + string text = view.TextBuffer.CurrentSnapshot.GetText(); + byte[] bytes = Encoding.UTF8.GetBytes(text); + var root = XElement.Parse(replacements); var edit = view.TextBuffer.CreateEdit(); foreach (XElement replacement in root.Descendants("replacement")) { + int offset = int.Parse(replacement.Attribute("offset").Value); + int length = int.Parse(replacement.Attribute("length").Value); var span = new Span( - int.Parse(replacement.Attribute("offset").Value), - int.Parse(replacement.Attribute("length").Value)); + Encoding.UTF8.GetCharCount(bytes, 0, offset), + Encoding.UTF8.GetCharCount(bytes, offset, length)); edit.Replace(span, replacement.Value); } edit.Apply(); From 92d5ea5d1674c38e03d130c6b04afa118e94ef4a Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 27 Nov 2019 10:27:25 +0100 Subject: [PATCH 097/591] [lldb][NFC] Move TypeSystem RTTI to static variable to remove swift reference --- lldb/include/lldb/Symbol/ClangASTContext.h | 8 ++-- lldb/include/lldb/Symbol/TypeSystem.h | 43 ++-------------------- lldb/source/Symbol/ClangASTContext.cpp | 11 +++--- lldb/source/Symbol/CompilerDecl.cpp | 3 +- lldb/source/Symbol/CompilerDeclContext.cpp | 3 +- lldb/source/Symbol/TypeSystem.cpp | 2 - 6 files changed, 17 insertions(+), 53 deletions(-) diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h index f4428c6821825..20421bca305ed 100644 --- a/lldb/include/lldb/Symbol/ClangASTContext.h +++ b/lldb/include/lldb/Symbol/ClangASTContext.h @@ -41,15 +41,17 @@ namespace lldb_private { class Declaration; class ClangASTContext : public TypeSystem { + // LLVM RTTI support + static char ID; + public: typedef void (*CompleteTagDeclCallback)(void *baton, clang::TagDecl *); typedef void (*CompleteObjCInterfaceDeclCallback)(void *baton, clang::ObjCInterfaceDecl *); // llvm casting support - static bool classof(const TypeSystem *ts) { - return ts->getKind() == TypeSystem::eKindClang; - } + bool isA(const void *ClassID) const override { return ClassID == &ID; } + static bool classof(const TypeSystem *ts) { return ts->isA(&ID); } // Constructors and Destructors explicit ClangASTContext(llvm::StringRef triple = ""); diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index 6283d67baba52..ea860647fdb1c 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -52,47 +52,11 @@ struct LanguageSet { /// Interface for representing the Type Systems in different languages. class TypeSystem : public PluginInterface { public: - // Intrusive type system that allows us to use llvm casting. - // - // To add a new type system: - // - // 1 - Add a new enumeration for llvm casting below for your TypeSystem - // subclass, here we will use eKindFoo - // - // 2 - Your TypeSystem subclass will inherit from TypeSystem and needs - // to implement a static classof() function that returns your - // enumeration: - // - // class Foo : public lldb_private::TypeSystem - // { - // static bool classof(const TypeSystem *ts) - // { - // return ts->getKind() == TypeSystem::eKindFoo; - // } - // }; - // - // 3 - Contruct your TypeSystem subclass with the enumeration from below - // - // Foo() : - // TypeSystem(TypeSystem::eKindFoo), - // ... - // { - // } - // - // Then you can use the llvm casting on any "TypeSystem *" to get an instance - // of your subclass. - enum LLVMCastKind { - eKindClang, - eKindSwift, - kNumKinds - }; - // Constructors and Destructors - TypeSystem(LLVMCastKind kind); - ~TypeSystem() override; - LLVMCastKind getKind() const { return m_kind; } + // LLVM RTTI support + virtual bool isA(const void *ClassID) const = 0; static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language, Module *module); @@ -493,8 +457,7 @@ class TypeSystem : public PluginInterface { virtual bool IsMeaninglessWithoutDynamicResolution(void *type); protected: - const LLVMCastKind m_kind; // Support for llvm casting - SymbolFile *m_sym_file; + SymbolFile *m_sym_file = nullptr; }; class TypeSystemMap { diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index 244ac8ce5ff87..e413029f03005 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -337,6 +337,8 @@ static ClangASTMap &GetASTMap() { return *g_map_ptr; } +char ClangASTContext::ID; + bool ClangASTContext::IsOperator(llvm::StringRef name, clang::OverloadedOperatorKind &op_kind) { // All operators have to start with "operator". @@ -522,8 +524,7 @@ static void ParseLangArgs(LangOptions &Opts, InputKind IK, const char *triple) { Opts.NoInlineDefine = !Opt; } -ClangASTContext::ClangASTContext(llvm::StringRef target_triple) - : TypeSystem(TypeSystem::eKindClang) { +ClangASTContext::ClangASTContext(llvm::StringRef target_triple) { if (!target_triple.empty()) SetTargetTriple(target_triple); // The caller didn't pass an ASTContext so create a new one for this @@ -531,16 +532,14 @@ ClangASTContext::ClangASTContext(llvm::StringRef target_triple) CreateASTContext(); } -ClangASTContext::ClangASTContext(ArchSpec arch) - : TypeSystem(TypeSystem::eKindClang) { +ClangASTContext::ClangASTContext(ArchSpec arch) { SetTargetTriple(arch.GetTriple().str()); // The caller didn't pass an ASTContext so create a new one for this // ClangASTContext. CreateASTContext(); } -ClangASTContext::ClangASTContext(ASTContext &existing_ctxt) - : TypeSystem(TypeSystem::eKindClang) { +ClangASTContext::ClangASTContext(ASTContext &existing_ctxt) { SetTargetTriple(existing_ctxt.getTargetInfo().getTriple().str()); m_ast_up.reset(&existing_ctxt); diff --git a/lldb/source/Symbol/CompilerDecl.cpp b/lldb/source/Symbol/CompilerDecl.cpp index 2c64113a2bbeb..3d17d802dd044 100644 --- a/lldb/source/Symbol/CompilerDecl.cpp +++ b/lldb/source/Symbol/CompilerDecl.cpp @@ -7,13 +7,14 @@ //===----------------------------------------------------------------------===// #include "lldb/Symbol/CompilerDecl.h" +#include "lldb/Symbol/ClangASTContext.h" #include "lldb/Symbol/CompilerDeclContext.h" #include "lldb/Symbol/TypeSystem.h" using namespace lldb_private; bool CompilerDecl::IsClang() const { - return IsValid() && m_type_system->getKind() == TypeSystem::eKindClang; + return IsValid() && llvm::isa(m_type_system); } ConstString CompilerDecl::GetName() const { diff --git a/lldb/source/Symbol/CompilerDeclContext.cpp b/lldb/source/Symbol/CompilerDeclContext.cpp index a6f046c4eb22e..7d45f47ad133c 100644 --- a/lldb/source/Symbol/CompilerDeclContext.cpp +++ b/lldb/source/Symbol/CompilerDeclContext.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "lldb/Symbol/CompilerDeclContext.h" +#include "lldb/Symbol/ClangASTContext.h" #include "lldb/Symbol/CompilerDecl.h" #include "lldb/Symbol/TypeSystem.h" #include @@ -24,7 +25,7 @@ CompilerDeclContext::FindDeclByName(ConstString name, } bool CompilerDeclContext::IsClang() const { - return IsValid() && m_type_system->getKind() == TypeSystem::eKindClang; + return IsValid() && llvm::isa(m_type_system); } ConstString CompilerDeclContext::GetName() const { diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index c63f24aea3354..4e746bd18e1f3 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -43,8 +43,6 @@ size_t LanguageSet::Size() const { return bitvector.count(); } bool LanguageSet::Empty() const { return bitvector.none(); } bool LanguageSet::operator[](unsigned i) const { return bitvector[i]; } -TypeSystem::TypeSystem(LLVMCastKind kind) : m_kind(kind), m_sym_file(nullptr) {} - TypeSystem::~TypeSystem() {} static lldb::TypeSystemSP CreateInstanceHelper(lldb::LanguageType language, From 3b35603a56b2dcc7fb0480d8d4c4aeacacecf1fb Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 26 Nov 2019 17:47:34 +0300 Subject: [PATCH 098/591] [llvm-readobj] - Always print "Predecessors" for version definition sections. This is a follow-up discussed in D70495 thread. The current logic is unusual for llvm-readobj. It doesn't print predecessors list when it is empty. This is not good for machine parsers. D70495 had to add this condition during refactoring to reduce amount of changes, in tests, because the original code also had a similar logic. Now seems it is time to get rid of it. This patch does it. Differential revision: https://reviews.llvm.org/D70717 --- lld/test/ELF/verdef-defaultver.s | 3 +++ lld/test/ELF/verdef-dependency.s | 4 ++++ lld/test/ELF/verdef.s | 4 ++++ llvm/test/tools/llvm-readobj/elf-versioninfo.test | 6 ++++++ llvm/test/tools/yaml2obj/ELF/verdef-section.yaml | 2 ++ llvm/tools/llvm-readobj/ELFDumper.cpp | 3 --- 6 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lld/test/ELF/verdef-defaultver.s b/lld/test/ELF/verdef-defaultver.s index 3c10f2dcfe26f..7d2a0d27fa11d 100644 --- a/lld/test/ELF/verdef-defaultver.s +++ b/lld/test/ELF/verdef-defaultver.s @@ -84,6 +84,7 @@ # DSO-NEXT: Index: 1 # DSO-NEXT: Hash: 127830196 # DSO-NEXT: Name: shared +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -92,6 +93,7 @@ # DSO-NEXT: Index: 2 # DSO-NEXT: Hash: 1425 # DSO-NEXT: Name: V1 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -100,6 +102,7 @@ # DSO-NEXT: Index: 3 # DSO-NEXT: Hash: 1426 # DSO-NEXT: Name: V2 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: ] diff --git a/lld/test/ELF/verdef-dependency.s b/lld/test/ELF/verdef-dependency.s index 479f332d49306..d716436202535 100644 --- a/lld/test/ELF/verdef-dependency.s +++ b/lld/test/ELF/verdef-dependency.s @@ -15,6 +15,7 @@ # DSO-NEXT: Index: 1 # DSO-NEXT: Hash: 127830196 # DSO-NEXT: Name: shared +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -23,6 +24,7 @@ # DSO-NEXT: Index: 2 # DSO-NEXT: Hash: 98457184 # DSO-NEXT: Name: LIBSAMPLE_1.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -31,6 +33,7 @@ # DSO-NEXT: Index: 3 # DSO-NEXT: Hash: 98456416 # DSO-NEXT: Name: LIBSAMPLE_2.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -39,5 +42,6 @@ # DSO-NEXT: Index: 4 # DSO-NEXT: Hash: 98456672 # DSO-NEXT: Name: LIBSAMPLE_3.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: ] diff --git a/lld/test/ELF/verdef.s b/lld/test/ELF/verdef.s index d2aa924782f86..dd1f1d41f0148 100644 --- a/lld/test/ELF/verdef.s +++ b/lld/test/ELF/verdef.s @@ -33,6 +33,7 @@ # DSO-NEXT: Index: 1 # DSO-NEXT: Hash: 127830196 # DSO-NEXT: Name: shared +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -41,6 +42,7 @@ # DSO-NEXT: Index: 2 # DSO-NEXT: Hash: 98457184 # DSO-NEXT: Name: LIBSAMPLE_1.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -49,6 +51,7 @@ # DSO-NEXT: Index: 3 # DSO-NEXT: Hash: 98456416 # DSO-NEXT: Name: LIBSAMPLE_2.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: Definition { # DSO-NEXT: Version: 1 @@ -57,6 +60,7 @@ # DSO-NEXT: Index: 4 # DSO-NEXT: Hash: 98456672 # DSO-NEXT: Name: LIBSAMPLE_3.0 +# DSO-NEXT: Predecessors: [] # DSO-NEXT: } # DSO-NEXT: ] # DSO-NEXT: VersionRequirements [ diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test index c732f7d2a2b6c..591dfe27a1a86 100644 --- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test +++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test @@ -148,6 +148,7 @@ DynamicSymbols: # LLVM-NEXT: Index: 0 # LLVM-NEXT: Hash: 0 # LLVM-NEXT: Name: VERSION1 +# LLVM-NEXT: Predecessors: [] # LLVM-NEXT: } # LLVM-NEXT: Definition { # LLVM-NEXT: Version: 1 @@ -157,6 +158,7 @@ DynamicSymbols: # LLVM-NEXT: Index: 0 # LLVM-NEXT: Hash: 0 # LLVM-NEXT: Name: VERSION1 +# LLVM-NEXT: Predecessors: [] # LLVM-NEXT: } # LLVM-NEXT: Definition { # LLVM-NEXT: Version: 1 @@ -166,6 +168,7 @@ DynamicSymbols: # LLVM-NEXT: Index: 0 # LLVM-NEXT: Hash: 0 # LLVM-NEXT: Name: VERSION1 +# LLVM-NEXT: Predecessors: [] # LLVM-NEXT: } # LLVM-NEXT: Definition { # LLVM-NEXT: Version: 1 @@ -175,6 +178,7 @@ DynamicSymbols: # LLVM-NEXT: Index: 0 # LLVM-NEXT: Hash: 0 # LLVM-NEXT: Name: VERSION1 +# LLVM-NEXT: Predecessors: [] # LLVM-NEXT: } # LLVM-NEXT: Definition { # LLVM-NEXT: Version: 1 @@ -186,6 +190,7 @@ DynamicSymbols: # LLVM-NEXT: Index: 2 # LLVM-NEXT: Hash: 175630257 # LLVM-NEXT: Name: VERSION1 +# LLVM-NEXT: Predecessors: [] # LLVM-NEXT: } # LLVM-NEXT: Definition { # LLVM-NEXT: Version: 1 @@ -436,6 +441,7 @@ DynamicSymbols: # PAST-STRTAB-END-LLVM-NEXT: Index: 0 # PAST-STRTAB-END-LLVM-NEXT: Hash: 0 # PAST-STRTAB-END-LLVM-NEXT: Name: +# PAST-STRTAB-END-LLVM-NEXT: Predecessors: [] # PAST-STRTAB-END-LLVM-NEXT: } # PAST-STRTAB-END-LLVM-NEXT: ] diff --git a/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml b/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml index 960acbdb74043..23bbc8ee791c0 100644 --- a/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/verdef-section.yaml @@ -12,6 +12,7 @@ # CHECK-NEXT: Index: 1 # CHECK-NEXT: Hash: 170240160 # CHECK-NEXT: Name: dso.so.0 +# CHECK-NEXT: Predecessors: [] # CHECK-NEXT: } # CHECK-NEXT: Definition { # CHECK-NEXT: Version: 1 @@ -21,6 +22,7 @@ # CHECK-NEXT: Index: 2 # CHECK-NEXT: Hash: 108387921 # CHECK-NEXT: Name: VERSION_1 +# CHECK-NEXT: Predecessors: [] # CHECK-NEXT: } # CHECK-NEXT: Definition { # CHECK-NEXT: Version: 1 diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 5127939f0180a..b7bd35e7c95cb 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -5826,9 +5826,6 @@ void LLVMStyle::printVersionDefinitionSection(const ELFFile *Obj, W.printNumber("Index", D.Ndx); W.printNumber("Hash", D.Hash); W.printString("Name", D.Name.c_str()); - - if (D.AuxV.empty()) - continue; W.printList( "Predecessors", D.AuxV, [](raw_ostream &OS, const VerdAux &Aux) { OS << Aux.Name.c_str(); }); From 31c25fadccec0d5e5f0048b551b99f8976b87eb0 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 27 Nov 2019 10:50:16 +0000 Subject: [PATCH 099/591] AArch64: support the Apple NEON syntax for v8.2 crypto instructions. Very simple change, just adding the extra syntax variant. --- .../lib/Target/AArch64/AArch64InstrFormats.td | 26 +++++++----- llvm/test/MC/AArch64/armv8.2a-crypto-apple.s | 41 +++++++++++++++++++ 2 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 llvm/test/MC/AArch64/armv8.2a-crypto-apple.s diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index d6bf9bcd805ca..5da790c846179 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -10407,9 +10407,9 @@ class CryptoRRTiedop0, bits<2>op1, string asm, string asmops> let Inst{11-10} = op1; } class CryptoRRTied_2Dop0, bits<2>op1, string asm> - : CryptoRRTied; + : CryptoRRTied; class CryptoRRTied_4Sop0, bits<2>op1, string asm> - : CryptoRRTied; + : CryptoRRTied; class CryptoRRR op0, bits<2>op1, dag oops, dag iops, string asm, string asmops, string cst> @@ -10424,19 +10424,19 @@ class CryptoRRR op0, bits<2>op1, dag oops, dag iops, string asm, } class CryptoRRR_2D op0, bits<2>op1, string asm> : CryptoRRR; + "{\t$Vd.2d, $Vn.2d, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "">; class CryptoRRRTied_2D op0, bits<2>op1, string asm> : CryptoRRR; + "{\t$Vd.2d, $Vn.2d, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">; class CryptoRRR_4S op0, bits<2>op1, string asm> : CryptoRRR; + "{\t$Vd.4s, $Vn.4s, $Vm.4s|.4s\t$Vd, $Vn, $Vm}", "">; class CryptoRRRTied_4S op0, bits<2>op1, string asm> : CryptoRRR; + "{\t$Vd.4s, $Vn.4s, $Vm.4s|.4s\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">; class CryptoRRRTied op0, bits<2>op1, string asm> : CryptoRRR; + asm, "{\t$Vd, $Vn, $Vm.2d|.2d\t$Vd, $Vn, $Vm}", "$Vd = $Vdst">; class CryptoRRRRop0, string asm, string asmops> : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, V128:$Va), asm, @@ -10450,15 +10450,18 @@ class CryptoRRRRop0, string asm, string asmops> let Inst{14-10} = Va; } class CryptoRRRR_16Bop0, string asm> - : CryptoRRRR { + : CryptoRRRR { } class CryptoRRRR_4Sop0, string asm> - : CryptoRRRR { + : CryptoRRRR { } class CryptoRRRi6 : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm, uimm6:$imm), asm, - "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm}", "", []> { + "{\t$Vd.2d, $Vn.2d, $Vm.2d, $imm" # + "|.2d\t$Vd, $Vn, $Vm, $imm}", "", []> { bits<6> imm; bits<5> Vm; let Inst{24-21} = 0b0100; @@ -10471,7 +10474,8 @@ class CryptoRRRi6 class CryptoRRRi2Tiedop0, bits<2>op1, string asm> : BaseCryptoV82<(outs V128:$Vdst), (ins V128:$Vd, V128:$Vn, V128:$Vm, VectorIndexS:$imm), - asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm}", "$Vd = $Vdst", []> { + asm, "{\t$Vd.4s, $Vn.4s, $Vm.s$imm" # + "|.4s\t$Vd, $Vn, $Vm$imm}", "$Vd = $Vdst", []> { bits<2> imm; bits<5> Vm; let Inst{24-21} = 0b0010; diff --git a/llvm/test/MC/AArch64/armv8.2a-crypto-apple.s b/llvm/test/MC/AArch64/armv8.2a-crypto-apple.s new file mode 100644 index 0000000000000..1b9153136d057 --- /dev/null +++ b/llvm/test/MC/AArch64/armv8.2a-crypto-apple.s @@ -0,0 +1,41 @@ +// RUN: llvm-mc -output-asm-variant=1 -triple aarch64-apple-ios -mattr=+sha3,+sm4 -show-encoding < %s | FileCheck %s + + sha512h.2d q0, q1, v2 + sha512h2.2d q0, q1, v2 + sha512su0.2d v11, v12 + sha512su1.2d v11, v13, v14 + eor3.16b v25, v12, v7, v2 + rax1.2d v30, v29, v26 + xar.2d v26, v21, v27, #63 + bcax.16b v31, v26, v2, v1 + +//CHECK: sha512h.2d q0, q1, v2 ; encoding: [0x20,0x80,0x62,0xce] +//CHECK: sha512h2.2d q0, q1, v2 ; encoding: [0x20,0x84,0x62,0xce] +//CHECK: sha512su0.2d v11, v12 ; encoding: [0x8b,0x81,0xc0,0xce] +//CHECK: sha512su1.2d v11, v13, v14 ; encoding: [0xab,0x89,0x6e,0xce] +//CHECK: eor3.16b v25, v12, v7, v2 ; encoding: [0x99,0x09,0x07,0xce] +//CHECK: rax1.2d v30, v29, v26 ; encoding: [0xbe,0x8f,0x7a,0xce] +//CHECK: xar.2d v26, v21, v27, #63 ; encoding: [0xba,0xfe,0x9b,0xce] +//CHECK: bcax.16b v31, v26, v2, v1 ; encoding: [0x5f,0x07,0x22,0xce] + + + + sm3ss1.4s v20, v23, v21, v22 + sm3tt1a.4s v20, v23, v21[3] + sm3tt1b.4s v20, v23, v21[3] + sm3tt2a.4s v20, v23, v21[3] + sm3tt2b.4s v20, v23, v21[3] + sm3partw1.4s v30, v29, v26 + sm3partw2.4s v30, v29, v26 + sm4ekey.4s v11, v11, v19 + sm4e.4s v2, v15 + +// CHECK: sm3ss1.4s v20, v23, v21, v22 ; encoding: [0xf4,0x5a,0x55,0xce] +// CHECK: sm3tt1a.4s v20, v23, v21[3] ; encoding: [0xf4,0xb2,0x55,0xce] +// CHECK: sm3tt1b.4s v20, v23, v21[3] ; encoding: [0xf4,0xb6,0x55,0xce] +// CHECK: sm3tt2a.4s v20, v23, v21[3] ; encoding: [0xf4,0xba,0x55,0xce] +// CHECK: sm3tt2b.4s v20, v23, v21[3] ; encoding: [0xf4,0xbe,0x55,0xce] +// CHECK: sm3partw1.4s v30, v29, v26 ; encoding: [0xbe,0xc3,0x7a,0xce] +// CHECK: sm3partw2.4s v30, v29, v26 ; encoding: [0xbe,0xc7,0x7a,0xce] +// CHECK: sm4ekey.4s v11, v11, v19 ; encoding: [0x6b,0xc9,0x73,0xce] +// CHECK: sm4e.4s v2, v15 ; encoding: [0xe2,0x85,0xc0,0xce] From 19ac0eaf07e60173baa7ee77fa11568c30b87455 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Mon, 25 Nov 2019 19:51:07 +0100 Subject: [PATCH 100/591] [clangd] Shutdown cleanly on signals. Summary: This avoids leaking PCH files if editors don't use the LSP shutdown protocol. This is one fix for https://github.com/clangd/clangd/issues/209 (Though I think we should *also* be unlinking the files) Reviewers: kadircet, jfb Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, arphaman, jfb, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70684 --- clang-tools-extra/clangd/CMakeLists.txt | 1 + clang-tools-extra/clangd/JSONTransport.cpp | 22 +++-- clang-tools-extra/clangd/Shutdown.cpp | 39 +++++++++ clang-tools-extra/clangd/Shutdown.h | 84 +++++++++++++++++++ clang-tools-extra/clangd/test/exit-eof.test | 7 ++ .../clangd/test/exit-signal.test | 32 +++++++ clang-tools-extra/clangd/tool/ClangdMain.cpp | 17 ++-- 7 files changed, 190 insertions(+), 12 deletions(-) create mode 100644 clang-tools-extra/clangd/Shutdown.cpp create mode 100644 clang-tools-extra/clangd/Shutdown.h create mode 100644 clang-tools-extra/clangd/test/exit-eof.test create mode 100644 clang-tools-extra/clangd/test/exit-signal.test diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index 8ab2ae6b91d3a..c1aea3bd119d1 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -69,6 +69,7 @@ add_clang_library(clangDaemon Selection.cpp SemanticHighlighting.cpp SemanticSelection.cpp + Shutdown.cpp SourceCode.cpp QueryDriverDatabase.cpp Threading.cpp diff --git a/clang-tools-extra/clangd/JSONTransport.cpp b/clang-tools-extra/clangd/JSONTransport.cpp index 4921035b6dbb3..6351b8056b3fa 100644 --- a/clang-tools-extra/clangd/JSONTransport.cpp +++ b/clang-tools-extra/clangd/JSONTransport.cpp @@ -7,8 +7,10 @@ //===----------------------------------------------------------------------===// #include "Logger.h" #include "Protocol.h" // For LSPError +#include "Shutdown.h" #include "Transport.h" #include "llvm/Support/Errno.h" +#include "llvm/Support/Error.h" namespace clang { namespace clangd { @@ -81,6 +83,10 @@ class JSONTransport : public Transport { llvm::Error loop(MessageHandler &Handler) override { while (!feof(In)) { + if (shutdownRequested()) + return llvm::createStringError( + std::make_error_code(std::errc::operation_canceled), + "Got signal, shutting down"); if (ferror(In)) return llvm::errorCodeToError( std::error_code(errno, std::system_category())); @@ -167,7 +173,7 @@ bool JSONTransport::handleMessage(llvm::json::Value Message, } // Tries to read a line up to and including \n. -// If failing, feof() or ferror() will be set. +// If failing, feof(), ferror(), or shutdownRequested() will be set. bool readLine(std::FILE *In, std::string &Out) { static constexpr int BufSize = 1024; size_t Size = 0; @@ -175,7 +181,8 @@ bool readLine(std::FILE *In, std::string &Out) { for (;;) { Out.resize(Size + BufSize); // Handle EINTR which is sent when a debugger attaches on some platforms. - if (!llvm::sys::RetryAfterSignal(nullptr, ::fgets, &Out[Size], BufSize, In)) + if (!retryAfterSignalUnlessShutdown( + nullptr, [&] { return std::fgets(&Out[Size], BufSize, In); })) return false; clearerr(In); // If the line contained null bytes, anything after it (including \n) will @@ -190,7 +197,7 @@ bool readLine(std::FILE *In, std::string &Out) { } // Returns None when: -// - ferror() or feof() are set. +// - ferror(), feof(), or shutdownRequested() are set. // - Content-Length is missing or empty (protocol error) llvm::Optional JSONTransport::readStandardMessage() { // A Language Server Protocol message starts with a set of HTTP headers, @@ -244,8 +251,9 @@ llvm::Optional JSONTransport::readStandardMessage() { std::string JSON(ContentLength, '\0'); for (size_t Pos = 0, Read; Pos < ContentLength; Pos += Read) { // Handle EINTR which is sent when a debugger attaches on some platforms. - Read = llvm::sys::RetryAfterSignal(0u, ::fread, &JSON[Pos], 1, - ContentLength - Pos, In); + Read = retryAfterSignalUnlessShutdown(0, [&]{ + return std::fread(&JSON[Pos], 1, ContentLength - Pos, In); + }); if (Read == 0) { elog("Input was aborted. Read only {0} bytes of expected {1}.", Pos, ContentLength); @@ -263,7 +271,7 @@ llvm::Optional JSONTransport::readStandardMessage() { // - messages are delimited by '---' on a line by itself // - lines starting with # are ignored. // This is a testing path, so favor simplicity over performance here. -// When returning None, feof() or ferror() will be set. +// When returning None, feof(), ferror(), or shutdownRequested() will be set. llvm::Optional JSONTransport::readDelimitedMessage() { std::string JSON; std::string Line; @@ -280,6 +288,8 @@ llvm::Optional JSONTransport::readDelimitedMessage() { JSON += Line; } + if (shutdownRequested()) + return llvm::None; if (ferror(In)) { elog("Input error while reading message!"); return llvm::None; diff --git a/clang-tools-extra/clangd/Shutdown.cpp b/clang-tools-extra/clangd/Shutdown.cpp new file mode 100644 index 0000000000000..dfea46d8dfeb8 --- /dev/null +++ b/clang-tools-extra/clangd/Shutdown.cpp @@ -0,0 +1,39 @@ +//===--- Shutdown.cpp - Unclean exit scenarios ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Shutdown.h" + +#include +#include + +namespace clang { +namespace clangd { + +void abortAfterTimeout(std::chrono::seconds Timeout) { + // This is more portable than sys::WatchDog, and yields a stack trace. + std::thread([Timeout] { + std::this_thread::sleep_for(Timeout); + std::abort(); + }).detach(); +} + +static std::atomic ShutdownRequested = {false}; + +void requestShutdown() { + if (ShutdownRequested.exchange(true)) + // This is the second shutdown request. Exit hard. + std::abort(); +} + +bool shutdownRequested() { + return ShutdownRequested; +} + +} // namespace clangd +} // namespace clang + diff --git a/clang-tools-extra/clangd/Shutdown.h b/clang-tools-extra/clangd/Shutdown.h new file mode 100644 index 0000000000000..3097f6a3e63c7 --- /dev/null +++ b/clang-tools-extra/clangd/Shutdown.h @@ -0,0 +1,84 @@ +//===--- Shutdown.h - Unclean exit scenarios --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// LSP specifies a protocol for shutting down: a `shutdown` request followed +// by an `exit` notification. If this protocol is followed, clangd should +// finish outstanding work and exit with code 0. +// +// The way this works in the happy case: +// - when ClangdLSPServer gets `shutdown`, it sets a flag +// - when ClangdLSPServer gets `exit`, it returns false to indicate end-of-LSP +// - Transport::loop() returns with no error +// - ClangdServer::run() checks the shutdown flag and returns with no error. +// - we `return 0` from main() +// - destructor of ClangdServer and other main()-locals runs. +// This blocks until outstanding requests complete (results are ignored) +// - global destructors run, such as fallback deletion of temporary files +// +// There are a number of things that can go wrong. Some are handled here, and +// some elsewhere. +// - `exit` notification with no `shutdown`: +// ClangdServer::run() sees this and returns false, main() returns nonzero. +// - stdin/stdout are closed +// The Transport detects this while doing IO and returns an error from loop() +// ClangdServer::run() logs a message and then returns false, etc +// - a request thread gets stuck, so the ClangdServer destructor hangs. +// Before returning from main(), we start a watchdog thread to abort() the +// process if it takes too long to exit. See abortAfterTimeout(). +// - clangd crashes (e.g. segfault or assertion) +// A fatal signal is sent (SEGV, ABRT, etc) +// The installed signal handler prints a stack trace and exits. +// - parent process goes away or tells us to shut down +// A "graceful shutdown" signal is sent (TERM, HUP, etc). +// The installed signal handler calls requestShutdown() which sets a flag. +// The Transport IO is interrupted, and Transport::loop() checks the flag and +// returns an error, etc. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SHUTDOWN_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SHUTDOWN_H + +#include +#include + +namespace clang { +namespace clangd { + +/// Causes this process to crash if still running after Timeout. +void abortAfterTimeout(std::chrono::seconds Timeout); + +/// Sets a flag to indicate that clangd was sent a shutdown signal, and the +/// transport loop should exit at the next opportunity. +/// If shutdown was already requested, aborts the process. +/// This function is threadsafe and signal-safe. +void requestShutdown(); +/// Checks whether requestShutdown() was called. +/// This function is threadsafe and signal-safe. +bool shutdownRequested(); + +/// Retry an operation if it gets interrupted by a signal. +/// This is like llvm::sys::RetryAfterSignal, except that if shutdown was +/// requested (which interrupts IO), we'll fail rather than retry. +template ()())> +Ret retryAfterSignalUnlessShutdown( + const typename std::enable_if::type &Fail, // Suppress deduction. + const Fun &F) { + Ret Res; + do { + if (shutdownRequested()) + return Fail; + errno = 0; + Res = F(); + } while (Res == Fail && errno == EINTR); + return Res; +} + +} // namespace clangd +} // namespace clang + +#endif diff --git a/clang-tools-extra/clangd/test/exit-eof.test b/clang-tools-extra/clangd/test/exit-eof.test new file mode 100644 index 0000000000000..06d2ea87ff480 --- /dev/null +++ b/clang-tools-extra/clangd/test/exit-eof.test @@ -0,0 +1,7 @@ +# RUN: not clangd -sync < %s 2> %t.err +# RUN: FileCheck %s < %t.err +# +# No LSP messages here, just let clangd see the end-of-file +# CHECK: Transport error: +# (Typically "Transport error: Input/output error" but platform-dependent). + diff --git a/clang-tools-extra/clangd/test/exit-signal.test b/clang-tools-extra/clangd/test/exit-signal.test new file mode 100644 index 0000000000000..15029b0e5cc5f --- /dev/null +++ b/clang-tools-extra/clangd/test/exit-signal.test @@ -0,0 +1,32 @@ +# This is a fiddly signal test, we need POSIX and a real shell. +UNSUPPORTED: win32 +REQUIRES: shell + +# Our goal is: +# 1) spawn clangd +# 2) wait for it to start up (install signal handlers) +# 3) send SIGTERM +# 4) wait for clangd to shut down (nonzero exit for a signal) +# 4) verify the shutdown was clean + +RUN: rm -f %t.err + # To keep clangd running, we need to hold its input stream open. + # We redirect its input to a subshell that waits for it to start up. +RUN: not clangd 2> %t.err < <( \ + # Loop waiting for clangd to start up, so signal handlers are installed. + # Reading the PID line ensures this, and lets us send a signal. +RUN: while true; do \ + # Relevant log line is I[timestamp] PID: +RUN: CLANGD_PID=$(grep -a -m 1 "PID:" %t.err | cut -d' ' -f 3); \ +RUN: [ ! -z "$CLANGD_PID" ] && break; \ +RUN: done; \ +RUN: kill $CLANGD_PID; \ + # Now wait for clangd to stop reading (before closing its input!) +RUN: while not grep "LSP finished" %t.err; do :; done; \ +RUN: ) + +# Check that clangd caught the signal and shut down cleanly. +RUN: FileCheck %s < %t.err +CHECK: Transport error: Got signal +CHECK: LSP finished + diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 608a2da681342..b8385a0c9e5d5 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -11,6 +11,7 @@ #include "Features.inc" #include "Path.h" #include "Protocol.h" +#include "Shutdown.h" #include "Trace.h" #include "Transport.h" #include "index/Background.h" @@ -35,6 +36,10 @@ #include #include +#ifndef _WIN32 +#include +#endif + namespace clang { namespace clangd { namespace { @@ -445,6 +450,7 @@ int main(int argc, char *argv[]) { llvm::InitializeAllTargetInfos(); llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + llvm::sys::SetInterruptFunction(&requestShutdown); llvm::cl::SetVersionPrinter([](llvm::raw_ostream &OS) { OS << clang::getClangToolFullVersion("clangd") << "\n"; }); @@ -541,6 +547,10 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var LoggingSession LoggingSession(Logger); // Write some initial logs before we start doing any real work. log("{0}", clang::getClangToolFullVersion("clangd")); +// FIXME: abstract this better, and print PID on windows too. +#ifndef _WIN32 + log("PID: {0}", getpid()); +#endif { SmallString<128> CWD; if (auto Err = llvm::sys::fs::current_path(CWD)) @@ -694,12 +704,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var // However if a bug causes them to run forever, we want to ensure the process // eventually exits. As clangd isn't directly user-facing, an editor can // "leak" clangd processes. Crashing in this case contains the damage. - // - // This is more portable than sys::WatchDog, and yields a stack trace. - std::thread([] { - std::this_thread::sleep_for(std::chrono::minutes(5)); - std::abort(); - }).detach(); + abortAfterTimeout(std::chrono::minutes(5)); return ExitCode; } From 755dfaa1048ef216cfdce68b05185d8ab1b00ae7 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 27 Nov 2019 11:45:24 +0000 Subject: [PATCH 101/591] gn build: Merge 19ac0eaf07e --- llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index 2302ca1e37333..dc513a41370e0 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -88,6 +88,7 @@ static_library("clangd") { "Selection.cpp", "SemanticHighlighting.cpp", "SemanticSelection.cpp", + "Shutdown.cpp", "SourceCode.cpp", "TUScheduler.cpp", "Threading.cpp", From bd2fb41c2dd537d1ab0dbce5795a1fef3de6cd52 Mon Sep 17 00:00:00 2001 From: AndreyChurbanov Date: Wed, 27 Nov 2019 15:26:51 +0300 Subject: [PATCH 102/591] [openmp] Fixed nonmonotonic schedule when #threads > #chunks in a loop. Differential Revision: https://reviews.llvm.org/D70713 --- openmp/runtime/src/kmp_dispatch.cpp | 15 +++---- .../for/omp_nonmonotonic_dynamic1.c | 40 +++++++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) create mode 100644 openmp/runtime/test/worksharing/for/omp_nonmonotonic_dynamic1.c diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index 161a2c6963578..aee1a649741cb 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -379,14 +379,15 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, } break; } else { - KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to " - "kmp_sch_static_balanced\n", - gtid)); - schedule = kmp_sch_static_balanced; - /* too few iterations: fall-through to kmp_sch_static_balanced */ + /* too few chunks: switching to kmp_sch_dynamic_chunked */ + schedule = kmp_sch_dynamic_chunked; + KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to " + "kmp_sch_dynamic_chunked\n", + gtid)); + if (pr->u.p.parm1 <= 0) + pr->u.p.parm1 = KMP_DEFAULT_CHUNK; + break; } // if - /* FALL-THROUGH to static balanced */ - KMP_FALLTHROUGH(); } // case #endif case kmp_sch_static_balanced: { diff --git a/openmp/runtime/test/worksharing/for/omp_nonmonotonic_dynamic1.c b/openmp/runtime/test/worksharing/for/omp_nonmonotonic_dynamic1.c new file mode 100644 index 0000000000000..0691353fe59e5 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_nonmonotonic_dynamic1.c @@ -0,0 +1,40 @@ +// RUN: %libomp-compile +// RUN: env OMP_SCHEDULE=nonmonotonic:dynamic,10 %libomp-run + +// The test checks iterations distribution for OMP 5.0 nonmonotonic OMP_SCHEDULE +// case #threads > #chunks (fallback to monotonic dynamic) + +#include +#include + +#define ITERS 100 +#define CHUNK 10 +int err = 0; + +int main(int argc, char **argv) { + int i, ch, it[ITERS]; + omp_set_num_threads(16); // #threads is bigger than #chunks +#pragma omp parallel for schedule(runtime) + for (i = 0; i < ITERS; ++i) { + it[i] = omp_get_thread_num(); + } + // check that each chunk executed by single thread + for (ch = 0; ch < ITERS/CHUNK; ++ch) { + int iter = ch * CHUNK; + int nt = it[iter]; // thread number + for (i = 1; i < CHUNK; ++i) { +#if _DEBUG + printf("iter %d: (%d %d)\n", iter + i, nt, it[iter + i]); +#endif + if (nt != it[iter + i]) { + err++; + } + } + } + if (err > 0) { + printf("Failed, err = %d\n", err); + return 1; + } + printf("Passed\n"); + return 0; +} From 9872ea4ed1de4c49300430e4f1f4dfc110a79ab9 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 27 Nov 2019 13:04:38 +0300 Subject: [PATCH 103/591] [clang][CodeGen] Implicit Conversion Sanitizer: handle increment/decrement (PR44054) Summary: Implicit Conversion Sanitizer is *almost* feature complete. There aren't *that* much unsanitized things left, two major ones are increment/decrement (this patch) and bit fields. As it was discussed in [[ https://bugs.llvm.org/show_bug.cgi?id=39519 | PR39519 ]], unlike `CompoundAssignOperator` (which is promoted internally), or `BinaryOperator` (for which we always have promotion/demotion in AST) or parts of `UnaryOperator` (we have promotion/demotion but only for certain operations), for inc/dec, clang omits promotion/demotion altogether, under as-if rule. This is technically correct: https://rise4fun.com/Alive/zPgD As it can be seen in `InstCombineCasts.cpp` `canEvaluateTruncated()`, `add`/`sub`/`mul`/`and`/`or`/`xor` operators can all arbitrarily be extended or truncated: https://github.com/llvm/llvm-project/blob/901cd3b3f62d0c700e5d2c3f97eff97d634bec5e/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp#L1320-L1334 But that has serious implications: 1. Since we no longer model implicit casts, do we pessimise their AST representation and everything that uses it? 2. There is no demotion, so lossy demotion sanitizer does not trigger :] Now, i'm not going to argue about the first problem here, but the second one **needs** to be addressed. As it was stated in the report, this is done intentionally, so changing this in all modes would be considered a penalization/regression. Which means, the sanitization-less codegen must not be altered. It was also suggested to not change the sanitized codegen to the one with demotion, but i quite strongly believe that will not be the wise choice here: 1. One will need to re-engineer the check that the inc/dec was lossy in terms of `@llvm.{u,s}{add,sub}.with.overflow` builtins 2. We will still need to compute the result we would lossily demote. (i.e. the result of wide `add`ition/`sub`traction) 3. I suspect it would need to be done right here, in sanitization. Which kinda defeats the point of using `@llvm.{u,s}{add,sub}.with.overflow` builtins: we'd have two `add`s with basically the same arguments, one of which is used for check+error-less codepath and other one for the error reporting. That seems worse than a single wide op+check. 4. OR, we would need to do that in the compiler-rt handler. Which means we'll need a whole new handler. But then what about the `CompoundAssignOperator`, it would also be applicable for it. So this also doesn't really seem like the right path to me. 5. At least X86 (but likely others) pessimizes all sub-`i32` operations (due to partial register stalls), so even if we avoid promotion+demotion, the computations will //likely// be performed in `i32` anyways. So i'm not really seeing much benefit of not doing the straight-forward thing. While looking into this, i have noticed a few more LLVM middle-end missed canonicalizations, and filed [[ https://bugs.llvm.org/show_bug.cgi?id=44100 | PR44100 ]], [[ https://bugs.llvm.org/show_bug.cgi?id=44102 | PR44102 ]]. Those are not specific to inc/dec, we also have them for `CompoundAssignOperator`, and it can happen for normal arithmetics, too. But if we take some other path in the patch, it will not be applicable here, and we will have most likely played ourselves. TLDR: front-end should emit canonical, easy-to-optimize yet un-optimized code. It is middle-end's job to make it optimal. I'm really hoping reviewers agree with my personal assessment of the path this patch should take.. Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=44054 | PR44054 ]]. Reviewers: rjmccall, erichkeane, rsmith, vsk Reviewed By: erichkeane Subscribers: mehdi_amini, dexonsmith, cfe-commits, #sanitizers, llvm-commits, aaron.ballman, t.p.northover, efriedma, regehr Tags: #llvm, #clang, #sanitizers Differential Revision: https://reviews.llvm.org/D70539 --- clang/docs/ReleaseNotes.rst | 4 + clang/lib/CodeGen/CGExprScalar.cpp | 36 +- ...catch-implicit-conversions-incdec-basics.c | 139 ++++++++ ...er-arithmetic-value-change-incdec-basics.c | 139 ++++++++ ...plicit-integer-conversions-incdec-basics.c | 139 ++++++++ ...licit-integer-sign-changes-incdec-basics.c | 139 ++++++++ ...tch-implicit-integer-sign-changes-incdec.c | 307 ++++++++++++++++++ ...plicit-integer-truncations-incdec-basics.c | 139 ++++++++ ...signed-integer-truncations-incdec-basics.c | 139 ++++++++ ...plicit-signed-integer-truncations-incdec.c | 303 +++++++++++++++++ ...signed-integer-truncations-incdec-basics.c | 101 ++++++ .../integer-conversion-incdec.c | 122 +++++++ .../integer-sign-change-incdec.c | 120 +++++++ .../signed-integer-truncation-incdec.c | 122 +++++++ 14 files changed, 1946 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c create mode 100644 clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c create mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c create mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c create mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4ac300deb589a..37a8f30e0bc9c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -78,6 +78,10 @@ Non-comprehensive list of changes in this release been extended to detect these cases, so that code relying on them can be detected and fixed. +* The Implicit Conversion Sanitizer (``-fsanitize=implicit-conversion``) has + learned to sanitize pre/post increment/decrement of types with bit width + smaller than ``int``. + - For X86 target, -march=skylake-avx512, -march=icelake-client, -march=icelake-server, -march=cascadelake, -march=cooperlake will default to not using 512-bit zmm registers in vectorized code unless 512-bit intrinsics diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 8229766406434..953ced9168c5b 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2419,9 +2419,39 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // Most common case by far: integer increment. } else if (type->isIntegerType()) { - // Note that signed integer inc/dec with width less than int can't - // overflow because of promotion rules; we're just eliding a few steps here. - if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { + assert((!type->isPromotableIntegerType() || + (type->isSignedIntegerOrEnumerationType() || + CGF.getContext() + .getPromotedIntegerType(type) + ->isSignedIntegerOrEnumerationType())) && + "The following check expects that if we do promotion, at least one " + "of the types (either base or promoted) will be signed."); + if (CGF.SanOpts.hasOneOf( + SanitizerKind::ImplicitIntegerArithmeticValueChange) && + type->isPromotableIntegerType()) { + // While `x += 1` (for `x` with width less than int) is modeled as + // promotion+arithmetics+demotion, and we can catch lossy demotion with + // ease; inc/dec with width less than int can't overflow because of + // promotion rules, so we omit promotion+demotion, which means that we can + // not catch lossy "demotion". Because we still want to catch these cases + // when the sanitizer is enabled, we perform the promotion, then perform + // the increment/decrement in the wider type, and finally + // perform the demotion. This will catch lossy demotions. + + QualType promotedType = CGF.getContext().getPromotedIntegerType(type); + assert(promotedType != type && "Shouldn't promote to the same type."); + value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); + Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); + value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); + // Do pass non-default ScalarConversionOpts so that sanitizer check is + // emitted. + value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), + ScalarConversionOpts(CGF.SanOpts)); + + // Note that signed integer inc/dec with width less than int can't + // overflow because of promotion rules; we're just eliding a few steps + // here. + } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); } else if (E->canOverflow() && type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { diff --git a/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c new file mode 100644 index 0000000000000..e97a72cb0a339 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c new file mode 100644 index 0000000000000..5e0aa1108dfc9 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c new file mode 100644 index 0000000000000..e97a72cb0a339 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c new file mode 100644 index 0000000000000..93495b331b9f8 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c new file mode 100644 index 0000000000000..3fe23ecbd8d9b --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c @@ -0,0 +1,307 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE + +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fno-sanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-trap=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } + +unsigned short t0(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t0( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t0( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 100 + return x++; +} +unsigned short t1(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t1( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t1( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 200 + return x--; +} + +unsigned short t2(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t2( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t2( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 300 + return ++x; +} + +unsigned short t3(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t3( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t3( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 400 + return --x; +} + +signed short t4(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t4( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t4( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 500 + return x++; +} +signed short t5(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t5( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t5( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 600 + return x--; +} + +signed short t6(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t6( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t6( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 700 + return ++x; +} + +signed short t7(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t7( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t7( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 800 + return --x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..6ac2be6d9fd0c --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..b7e438c7229ce --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c new file mode 100644 index 0000000000000..5e602d60faffc --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c @@ -0,0 +1,303 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE + +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fno-sanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-trap=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } + +unsigned short t0(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t0( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t0( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 100 + return x++; +} +unsigned short t1(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t1( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t1( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 200 + return x--; +} + +unsigned short t2(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t2( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t2( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 300 + return ++x; +} + +unsigned short t3(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t3( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t3( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 400 + return --x; +} + +signed short t4(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t4( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t4( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 500 + return x++; +} +signed short t5(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t5( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t5( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 600 + return x--; +} + +signed short t6(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t6( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t6( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 700 + return ++x; +} + +signed short t7(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t7( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t7( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 800 + return --x; +} diff --git a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..7ad12314f3df0 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c @@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { +#line 1600 + --x; + return x; +} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c new file mode 100644 index 0000000000000..0e62c02d3affb --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c @@ -0,0 +1,122 @@ +// RUN: %clang -x c -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + x = 255; + ++x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c new file mode 100644 index 0000000000000..4b56a105aa289 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c @@ -0,0 +1,120 @@ +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + x = 255; + ++x; + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c new file mode 100644 index 0000000000000..4806efb24eb13 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c @@ -0,0 +1,122 @@ +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + x = 255; + ++x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} From 3edf2eb897e4fe0795253e8e8c1e62b93bac60c9 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 27 Nov 2019 13:44:06 +0100 Subject: [PATCH 104/591] [Frontend] Clean up some dead code in PrecompiledPreamble. NFC --- .../clang/Frontend/PrecompiledPreamble.h | 8 -------- clang/lib/Frontend/PrecompiledPreamble.cpp | 17 +++-------------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h index 1a8a64951ec49..5ae77735576cd 100644 --- a/clang/include/clang/Frontend/PrecompiledPreamble.h +++ b/clang/include/clang/Frontend/PrecompiledPreamble.h @@ -134,14 +134,6 @@ class PrecompiledPreamble { // A main method used to construct TempPCHFile. static llvm::ErrorOr CreateNewPreamblePCHFile(); - /// Call llvm::sys::fs::createTemporaryFile to create a new temporary file. - static llvm::ErrorOr createInSystemTempDir(const Twine &Prefix, - StringRef Suffix); - /// Create a new instance of TemporaryFile for file at \p Path. Use with - /// extreme caution, there's an assertion checking that there's only a - /// single instance of TempPCHFile alive for each path. - static llvm::ErrorOr createFromCustomPath(const Twine &Path); - private: TempPCHFile(std::string FilePath); diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp index ced32c670288e..0e5a8e504dc59 100644 --- a/clang/lib/Frontend/PrecompiledPreamble.cpp +++ b/clang/lib/Frontend/PrecompiledPreamble.cpp @@ -535,21 +535,15 @@ PrecompiledPreamble::TempPCHFile::CreateNewPreamblePCHFile() { // FIXME: This is a hack so that we can override the preamble file during // crash-recovery testing, which is the only case where the preamble files // are not necessarily cleaned up. - const char *TmpFile = ::getenv("CINDEXTEST_PREAMBLE_FILE"); - if (TmpFile) - return TempPCHFile::createFromCustomPath(TmpFile); - return TempPCHFile::createInSystemTempDir("preamble", "pch"); -} + if (const char *TmpFile = ::getenv("CINDEXTEST_PREAMBLE_FILE")) + return TempPCHFile(TmpFile); -llvm::ErrorOr -PrecompiledPreamble::TempPCHFile::createInSystemTempDir(const Twine &Prefix, - StringRef Suffix) { llvm::SmallString<64> File; // Using a version of createTemporaryFile with a file descriptor guarantees // that we would never get a race condition in a multi-threaded setting // (i.e., multiple threads getting the same temporary path). int FD; - auto EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, FD, File); + auto EC = llvm::sys::fs::createTemporaryFile("preamble", "pch", FD, File); if (EC) return EC; // We only needed to make sure the file exists, close the file right away. @@ -557,11 +551,6 @@ PrecompiledPreamble::TempPCHFile::createInSystemTempDir(const Twine &Prefix, return TempPCHFile(std::move(File).str()); } -llvm::ErrorOr -PrecompiledPreamble::TempPCHFile::createFromCustomPath(const Twine &Path) { - return TempPCHFile(Path.str()); -} - PrecompiledPreamble::TempPCHFile::TempPCHFile(std::string FilePath) : FilePath(std::move(FilePath)) { TemporaryFiles::getInstance().addFile(*this->FilePath); From a29aa47106205ec95c12e0ebac4260c5de878a6a Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Wed, 27 Nov 2019 11:03:11 +0000 Subject: [PATCH 105/591] [OpenCL] Move addr space deduction to Sema. In order to simplify implementation we are moving add space deduction into Sema while constructing variable declaration and on template instantiation. Pointee are deduced to generic addr space during creation of types. This commit also - fixed addr space dedution for auto type; - factors out in a separate helper function OpenCL specific logic from type diagnostics in var decl. Tags: #clang Differential Revision: https://reviews.llvm.org/D65744 --- clang/include/clang/AST/Type.h | 6 + clang/include/clang/Sema/Sema.h | 2 + clang/lib/AST/Expr.cpp | 2 +- clang/lib/Sema/SemaDecl.cpp | 218 +++++++++++------- clang/lib/Sema/SemaTemplateInstantiate.cpp | 8 +- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 3 + clang/lib/Sema/SemaType.cpp | 155 ++----------- clang/lib/Sema/TreeTransform.h | 17 -- clang/test/SemaOpenCL/event_t.cl | 4 +- clang/test/SemaOpenCL/invalid-block.cl | 4 +- clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl | 2 +- clang/test/SemaOpenCL/sampler_t.cl | 5 +- .../SemaOpenCLCXX/address-space-deduction.cl | 28 ++- clang/test/SemaOpenCLCXX/addrspace-auto.cl | 35 +++ clang/test/SemaOpenCLCXX/restricted.cl | 4 +- 15 files changed, 236 insertions(+), 257 deletions(-) create mode 100644 clang/test/SemaOpenCLCXX/addrspace-auto.cl diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index ecbbd73e19fb4..05e78aa78236f 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2069,6 +2069,8 @@ class alignas(8) Type : public ExtQualsTypeCommonBase { bool isAlignValT() const; // C++17 std::align_val_t bool isStdByteType() const; // C++17 std::byte bool isAtomicType() const; // C11 _Atomic() + bool isUndeducedAutoType() const; // C++11 auto or + // C++14 decltype(auto) #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \ bool is##Id##Type() const; @@ -6509,6 +6511,10 @@ inline bool Type::isAtomicType() const { return isa(CanonicalType); } +inline bool Type::isUndeducedAutoType() const { + return isa(CanonicalType); +} + inline bool Type::isObjCQualifiedIdType() const { if (const auto *OPT = getAs()) return OPT->isObjCQualifiedIdType(); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 0a6f58a484ae3..ac5a4953e00d7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -8760,6 +8760,8 @@ class Sema final { bool CheckARCMethodDecl(ObjCMethodDecl *method); bool inferObjCARCLifetime(ValueDecl *decl); + void deduceOpenCLAddressSpace(ValueDecl *decl); + ExprResult HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT, Expr *BaseExpr, diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 3f722f8fd541e..322b3a7fa7400 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1814,7 +1814,7 @@ bool CastExpr::CastConsistency() const { auto Ty = getType(); auto SETy = getSubExpr()->getType(); assert(getValueKindForType(Ty) == Expr::getValueKindForType(SETy)); - if (/*isRValue()*/ !Ty->getPointeeType().isNull()) { + if (isRValue()) { Ty = Ty->getPointeeType(); SETy = SETy->getPointeeType(); } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 6ea4923dc2ba2..dffb460cedc9b 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6117,6 +6117,22 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) { return false; } +void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) { + if (Decl->getType().getQualifiers().hasAddressSpace()) + return; + if (VarDecl *Var = dyn_cast(Decl)) { + QualType Type = Var->getType(); + if (Type->isSamplerT() || Type->isVoidType()) + return; + LangAS ImplAS = LangAS::opencl_private; + if ((getLangOpts().OpenCLCPlusPlus || getLangOpts().OpenCLVersion >= 200) && + Var->hasGlobalStorage()) + ImplAS = LangAS::opencl_global; + Type = Context.getAddrSpaceQualType(Type, ImplAS); + Decl->setType(Type); + } +} + static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) { // Ensure that an auto decl is deduced otherwise the checks below might cache // the wrong linkage. @@ -6474,6 +6490,105 @@ static bool isDeclExternC(const Decl *D) { llvm_unreachable("Unknown type of decl!"); } +/// Returns true if there hasn't been any invalid type diagnosed. +static bool diagnoseOpenCLTypes(Scope *S, Sema &Se, Declarator &D, + DeclContext *DC, QualType R) { + // OpenCL v2.0 s6.9.b - Image type can only be used as a function argument. + // OpenCL v2.0 s6.13.16.1 - Pipe type can only be used as a function + // argument. + if (R->isImageType() || R->isPipeType()) { + Se.Diag(D.getIdentifierLoc(), + diag::err_opencl_type_can_only_be_used_as_function_parameter) + << R; + D.setInvalidType(); + return false; + } + + // OpenCL v1.2 s6.9.r: + // The event type cannot be used to declare a program scope variable. + // OpenCL v2.0 s6.9.q: + // The clk_event_t and reserve_id_t types cannot be declared in program + // scope. + if (NULL == S->getParent()) { + if (R->isReserveIDT() || R->isClkEventT() || R->isEventT()) { + Se.Diag(D.getIdentifierLoc(), + diag::err_invalid_type_for_program_scope_var) + << R; + D.setInvalidType(); + return false; + } + } + + // OpenCL v1.0 s6.8.a.3: Pointers to functions are not allowed. + QualType NR = R; + while (NR->isPointerType()) { + if (NR->isFunctionPointerType()) { + Se.Diag(D.getIdentifierLoc(), diag::err_opencl_function_pointer); + D.setInvalidType(); + return false; + } + NR = NR->getPointeeType(); + } + + if (!Se.getOpenCLOptions().isEnabled("cl_khr_fp16")) { + // OpenCL v1.2 s6.1.1.1: reject declaring variables of the half and + // half array type (unless the cl_khr_fp16 extension is enabled). + if (Se.Context.getBaseElementType(R)->isHalfType()) { + Se.Diag(D.getIdentifierLoc(), diag::err_opencl_half_declaration) << R; + D.setInvalidType(); + return false; + } + } + + // OpenCL v1.2 s6.9.r: + // The event type cannot be used with the __local, __constant and __global + // address space qualifiers. + if (R->isEventT()) { + if (R.getAddressSpace() != LangAS::opencl_private) { + Se.Diag(D.getBeginLoc(), diag::err_event_t_addr_space_qual); + D.setInvalidType(); + return false; + } + } + + // C++ for OpenCL does not allow the thread_local storage qualifier. + // OpenCL C does not support thread_local either, and + // also reject all other thread storage class specifiers. + DeclSpec::TSCS TSC = D.getDeclSpec().getThreadStorageClassSpec(); + if (TSC != TSCS_unspecified) { + bool IsCXX = Se.getLangOpts().OpenCLCPlusPlus; + Se.Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(), + diag::err_opencl_unknown_type_specifier) + << IsCXX << Se.getLangOpts().getOpenCLVersionTuple().getAsString() + << DeclSpec::getSpecifierName(TSC) << 1; + D.setInvalidType(); + return false; + } + + if (R->isSamplerT()) { + // OpenCL v1.2 s6.9.b p4: + // The sampler type cannot be used with the __local and __global address + // space qualifiers. + if (R.getAddressSpace() == LangAS::opencl_local || + R.getAddressSpace() == LangAS::opencl_global) { + Se.Diag(D.getIdentifierLoc(), diag::err_wrong_sampler_addressspace); + D.setInvalidType(); + } + + // OpenCL v1.2 s6.12.14.1: + // A global sampler must be declared with either the constant address + // space qualifier or with the const qualifier. + if (DC->isTranslationUnit() && + !(R.getAddressSpace() == LangAS::opencl_constant || + R.isConstQualified())) { + Se.Diag(D.getIdentifierLoc(), diag::err_opencl_nonconst_global_sampler); + D.setInvalidType(); + } + if (D.isInvalidType()) + return false; + } + return true; +} NamedDecl *Sema::ActOnVariableDeclarator( Scope *S, Declarator &D, DeclContext *DC, TypeSourceInfo *TInfo, @@ -6497,95 +6612,6 @@ NamedDecl *Sema::ActOnVariableDeclarator( return nullptr; } - if (getLangOpts().OpenCL) { - // OpenCL v2.0 s6.9.b - Image type can only be used as a function argument. - // OpenCL v2.0 s6.13.16.1 - Pipe type can only be used as a function - // argument. - if (R->isImageType() || R->isPipeType()) { - Diag(D.getIdentifierLoc(), - diag::err_opencl_type_can_only_be_used_as_function_parameter) - << R; - D.setInvalidType(); - return nullptr; - } - - // OpenCL v1.2 s6.9.r: - // The event type cannot be used to declare a program scope variable. - // OpenCL v2.0 s6.9.q: - // The clk_event_t and reserve_id_t types cannot be declared in program scope. - if (NULL == S->getParent()) { - if (R->isReserveIDT() || R->isClkEventT() || R->isEventT()) { - Diag(D.getIdentifierLoc(), - diag::err_invalid_type_for_program_scope_var) << R; - D.setInvalidType(); - return nullptr; - } - } - - // OpenCL v1.0 s6.8.a.3: Pointers to functions are not allowed. - QualType NR = R; - while (NR->isPointerType()) { - if (NR->isFunctionPointerType()) { - Diag(D.getIdentifierLoc(), diag::err_opencl_function_pointer); - D.setInvalidType(); - break; - } - NR = NR->getPointeeType(); - } - - if (!getOpenCLOptions().isEnabled("cl_khr_fp16")) { - // OpenCL v1.2 s6.1.1.1: reject declaring variables of the half and - // half array type (unless the cl_khr_fp16 extension is enabled). - if (Context.getBaseElementType(R)->isHalfType()) { - Diag(D.getIdentifierLoc(), diag::err_opencl_half_declaration) << R; - D.setInvalidType(); - } - } - - if (R->isSamplerT()) { - // OpenCL v1.2 s6.9.b p4: - // The sampler type cannot be used with the __local and __global address - // space qualifiers. - if (R.getAddressSpace() == LangAS::opencl_local || - R.getAddressSpace() == LangAS::opencl_global) { - Diag(D.getIdentifierLoc(), diag::err_wrong_sampler_addressspace); - } - - // OpenCL v1.2 s6.12.14.1: - // A global sampler must be declared with either the constant address - // space qualifier or with the const qualifier. - if (DC->isTranslationUnit() && - !(R.getAddressSpace() == LangAS::opencl_constant || - R.isConstQualified())) { - Diag(D.getIdentifierLoc(), diag::err_opencl_nonconst_global_sampler); - D.setInvalidType(); - } - } - - // OpenCL v1.2 s6.9.r: - // The event type cannot be used with the __local, __constant and __global - // address space qualifiers. - if (R->isEventT()) { - if (R.getAddressSpace() != LangAS::opencl_private) { - Diag(D.getBeginLoc(), diag::err_event_t_addr_space_qual); - D.setInvalidType(); - } - } - - // C++ for OpenCL does not allow the thread_local storage qualifier. - // OpenCL C does not support thread_local either, and - // also reject all other thread storage class specifiers. - DeclSpec::TSCS TSC = D.getDeclSpec().getThreadStorageClassSpec(); - if (TSC != TSCS_unspecified) { - bool IsCXX = getLangOpts().OpenCLCPlusPlus; - Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(), - diag::err_opencl_unknown_type_specifier) - << IsCXX << getLangOpts().getOpenCLVersionTuple().getAsString() - << DeclSpec::getSpecifierName(TSC) << 1; - D.setInvalidType(); - return nullptr; - } - } DeclSpec::SCS SCSpec = D.getDeclSpec().getStorageClassSpec(); StorageClass SC = StorageClassSpecToVarDeclStorageClass(D.getDeclSpec()); @@ -6942,6 +6968,13 @@ NamedDecl *Sema::ActOnVariableDeclarator( } } + if (getLangOpts().OpenCL) { + + deduceOpenCLAddressSpace(NewVD); + + diagnoseOpenCLTypes(S, *this, D, DC, NewVD->getType()); + } + // Handle attributes prior to checking for duplicates in MergeVarDecl ProcessDeclAttributes(S, NewVD, D); @@ -11285,6 +11318,9 @@ bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit, if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(VDecl)) VDecl->setInvalidDecl(); + if (getLangOpts().OpenCL) + deduceOpenCLAddressSpace(VDecl); + // If this is a redeclaration, check that the type we just deduced matches // the previously declared type. if (VarDecl *Old = VDecl->getPreviousDecl()) { @@ -13107,6 +13143,10 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D) { if (New->hasAttr()) { Diag(New->getLocation(), diag::err_block_on_nonlocal); } + + if (getLangOpts().OpenCL) + deduceOpenCLAddressSpace(New); + return New; } diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 0daa33cfbef55..d75be4be988c4 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1514,8 +1514,12 @@ TemplateInstantiator::TransformFunctionTypeParam(ParmVarDecl *OldParm, int indexAdjustment, Optional NumExpansions, bool ExpectParameterPack) { - return SemaRef.SubstParmVarDecl(OldParm, TemplateArgs, indexAdjustment, - NumExpansions, ExpectParameterPack); + auto NewParm = + SemaRef.SubstParmVarDecl(OldParm, TemplateArgs, indexAdjustment, + NumExpansions, ExpectParameterPack); + if (NewParm && SemaRef.getLangOpts().OpenCL) + SemaRef.deduceOpenCLAddressSpace(NewParm); + return NewParm; } QualType diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index a2fd8a92dd61e..9a6c7b5277b58 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -930,6 +930,9 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D, SemaRef.inferObjCARCLifetime(Var)) Var->setInvalidDecl(); + if (SemaRef.getLangOpts().OpenCL) + SemaRef.deduceOpenCLAddressSpace(Var); + // Substitute the nested name specifier, if any. if (SubstQualifier(D, Var)) return nullptr; diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index b87978035ad8b..2f5fdfb1f9120 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1976,6 +1976,19 @@ bool Sema::CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc) { return true; } +// Helper to deduce addr space of a pointee type in OpenCL mode. +static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) { + if (!PointeeType->isUndeducedAutoType() && !PointeeType->isDependentType() && + !PointeeType->isSamplerT() && + !PointeeType.getQualifiers().hasAddressSpace()) + PointeeType = S.getASTContext().getAddrSpaceQualType( + PointeeType, + S.getLangOpts().OpenCLCPlusPlus || S.getLangOpts().OpenCLVersion == 200 + ? LangAS::opencl_generic + : LangAS::opencl_private); + return PointeeType; +} + /// Build a pointer type. /// /// \param T The type to which we'll be building a pointer. @@ -2012,6 +2025,9 @@ QualType Sema::BuildPointerType(QualType T, if (getLangOpts().ObjCAutoRefCount) T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ false); + if (getLangOpts().OpenCL) + T = deduceOpenCLPointeeAddrSpace(*this, T); + // Build the pointer type. return Context.getPointerType(T); } @@ -2072,6 +2088,9 @@ QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue, if (getLangOpts().ObjCAutoRefCount) T = inferARCLifetimeForPointee(*this, T, Loc, /*reference*/ true); + if (getLangOpts().OpenCL) + T = deduceOpenCLPointeeAddrSpace(*this, T); + // Handle restrict on references. if (LValueRef) return Context.getLValueReferenceType(T, SpelledAsLValue); @@ -2655,6 +2674,9 @@ QualType Sema::BuildBlockPointerType(QualType T, if (checkQualifiedFunction(*this, T, Loc, QFK_BlockPointer)) return QualType(); + if (getLangOpts().OpenCL) + T = deduceOpenCLPointeeAddrSpace(*this, T); + return Context.getBlockPointerType(T); } @@ -7369,137 +7391,6 @@ static void HandleOpenCLAccessAttr(QualType &CurType, const ParsedAttr &Attr, } } -static void deduceOpenCLImplicitAddrSpace(TypeProcessingState &State, - QualType &T, TypeAttrLocation TAL) { - Declarator &D = State.getDeclarator(); - - // Handle the cases where address space should not be deduced. - // - // The pointee type of a pointer type is always deduced since a pointer always - // points to some memory location which should has an address space. - // - // There are situations that at the point of certain declarations, the address - // space may be unknown and better to be left as default. For example, when - // defining a typedef or struct type, they are not associated with any - // specific address space. Later on, they may be used with any address space - // to declare a variable. - // - // The return value of a function is r-value, therefore should not have - // address space. - // - // The void type does not occupy memory, therefore should not have address - // space, except when it is used as a pointee type. - // - // Since LLVM assumes function type is in default address space, it should not - // have address space. - auto ChunkIndex = State.getCurrentChunkIndex(); - bool IsPointee = - ChunkIndex > 0 && - (D.getTypeObject(ChunkIndex - 1).Kind == DeclaratorChunk::Pointer || - D.getTypeObject(ChunkIndex - 1).Kind == DeclaratorChunk::Reference || - D.getTypeObject(ChunkIndex - 1).Kind == DeclaratorChunk::BlockPointer); - // For pointers/references to arrays the next chunk is always an array - // followed by any number of parentheses. - if (!IsPointee && ChunkIndex > 1) { - auto AdjustedCI = ChunkIndex - 1; - if (D.getTypeObject(AdjustedCI).Kind == DeclaratorChunk::Array) - AdjustedCI--; - // Skip over all parentheses. - while (AdjustedCI > 0 && - D.getTypeObject(AdjustedCI).Kind == DeclaratorChunk::Paren) - AdjustedCI--; - if (D.getTypeObject(AdjustedCI).Kind == DeclaratorChunk::Pointer || - D.getTypeObject(AdjustedCI).Kind == DeclaratorChunk::Reference) - IsPointee = true; - } - bool IsFuncReturnType = - ChunkIndex > 0 && - D.getTypeObject(ChunkIndex - 1).Kind == DeclaratorChunk::Function; - bool IsFuncType = - ChunkIndex < D.getNumTypeObjects() && - D.getTypeObject(ChunkIndex).Kind == DeclaratorChunk::Function; - if ( // Do not deduce addr space for function return type and function type, - // otherwise it will fail some sema check. - IsFuncReturnType || IsFuncType || - // Do not deduce addr space for member types of struct, except the pointee - // type of a pointer member type or static data members. - (D.getContext() == DeclaratorContext::MemberContext && - (!IsPointee && - D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static)) || - // Do not deduce addr space of non-pointee in type alias because it - // doesn't define any object. - (D.getContext() == DeclaratorContext::AliasDeclContext && !IsPointee) || - // Do not deduce addr space for types used to define a typedef and the - // typedef itself, except the pointee type of a pointer type which is used - // to define the typedef. - (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef && - !IsPointee) || - // Do not deduce addr space of the void type, e.g. in f(void), otherwise - // it will fail some sema check. - (T->isVoidType() && !IsPointee) || - // Do not deduce addr spaces for dependent types because they might end - // up instantiating to a type with an explicit address space qualifier. - // Except for pointer or reference types because the addr space in - // template argument can only belong to a pointee. - (T->isDependentType() && !T->isPointerType() && !T->isReferenceType()) || - // Do not deduce addr space of decltype because it will be taken from - // its argument. - T->isDecltypeType() || - // OpenCL spec v2.0 s6.9.b: - // The sampler type cannot be used with the __local and __global address - // space qualifiers. - // OpenCL spec v2.0 s6.13.14: - // Samplers can also be declared as global constants in the program - // source using the following syntax. - // const sampler_t = - // In codegen, file-scope sampler type variable has special handing and - // does not rely on address space qualifier. On the other hand, deducing - // address space of const sampler file-scope variable as global address - // space causes spurious diagnostic about __global address space - // qualifier, therefore do not deduce address space of file-scope sampler - // type variable. - (D.getContext() == DeclaratorContext::FileContext && T->isSamplerT())) - return; - - LangAS ImpAddr = LangAS::Default; - // Put OpenCL automatic variable in private address space. - // OpenCL v1.2 s6.5: - // The default address space name for arguments to a function in a - // program, or local variables of a function is __private. All function - // arguments shall be in the __private address space. - if (State.getSema().getLangOpts().OpenCLVersion <= 120 && - !State.getSema().getLangOpts().OpenCLCPlusPlus) { - ImpAddr = LangAS::opencl_private; - } else { - // If address space is not set, OpenCL 2.0 defines non private default - // address spaces for some cases: - // OpenCL 2.0, section 6.5: - // The address space for a variable at program scope or a static variable - // inside a function can either be __global or __constant, but defaults to - // __global if not specified. - // (...) - // Pointers that are declared without pointing to a named address space - // point to the generic address space. - if (IsPointee) { - ImpAddr = LangAS::opencl_generic; - } else { - if (D.getContext() == DeclaratorContext::TemplateArgContext) { - // Do not deduce address space for non-pointee type in template arg. - } else if (D.getContext() == DeclaratorContext::FileContext) { - ImpAddr = LangAS::opencl_global; - } else { - if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static || - D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_extern) { - ImpAddr = LangAS::opencl_global; - } else { - ImpAddr = LangAS::opencl_private; - } - } - } - } - T = State.getSema().Context.getAddrSpaceQualType(T, ImpAddr); -} - static void HandleLifetimeBoundAttr(TypeProcessingState &State, QualType &CurType, ParsedAttr &Attr) { @@ -7729,8 +7620,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, if (!state.getSema().getLangOpts().OpenCL || type.getAddressSpace() != LangAS::Default) return; - - deduceOpenCLImplicitAddrSpace(state, type, TAL); } void Sema::completeExprArrayBound(Expr *E) { diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 28c5738eb2660..47bd98a850302 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4579,14 +4579,6 @@ QualType TreeTransform::TransformDecayedType(TypeLocBuilder &TLB, return Result; } -/// Helper to deduce addr space of a pointee type in OpenCL mode. -/// If the type is updated it will be overwritten in PointeeType param. -inline void deduceOpenCLPointeeAddrSpace(Sema &SemaRef, QualType &PointeeType) { - if (PointeeType.getAddressSpace() == LangAS::Default) - PointeeType = SemaRef.Context.getAddrSpaceQualType(PointeeType, - LangAS::opencl_generic); -} - template QualType TreeTransform::TransformPointerType(TypeLocBuilder &TLB, PointerTypeLoc TL) { @@ -4595,9 +4587,6 @@ QualType TreeTransform::TransformPointerType(TypeLocBuilder &TLB, if (PointeeType.isNull()) return QualType(); - if (SemaRef.getLangOpts().OpenCL) - deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType); - QualType Result = TL.getType(); if (PointeeType->getAs()) { // A dependent pointer type 'T *' has is being transformed such @@ -4636,9 +4625,6 @@ TreeTransform::TransformBlockPointerType(TypeLocBuilder &TLB, if (PointeeType.isNull()) return QualType(); - if (SemaRef.getLangOpts().OpenCL) - deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType); - QualType Result = TL.getType(); if (getDerived().AlwaysRebuild() || PointeeType != TL.getPointeeLoc().getType()) { @@ -4668,9 +4654,6 @@ TreeTransform::TransformReferenceType(TypeLocBuilder &TLB, if (PointeeType.isNull()) return QualType(); - if (SemaRef.getLangOpts().OpenCL) - deduceOpenCLPointeeAddrSpace(SemaRef, PointeeType); - QualType Result = TL.getType(); if (getDerived().AlwaysRebuild() || PointeeType != T->getPointeeTypeAsWritten()) { diff --git a/clang/test/SemaOpenCL/event_t.cl b/clang/test/SemaOpenCL/event_t.cl index e7daf88576cc5..ab7f09170e9cf 100644 --- a/clang/test/SemaOpenCL/event_t.cl +++ b/clang/test/SemaOpenCL/event_t.cl @@ -1,6 +1,6 @@ // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -event_t glb_evt; // expected-error {{the 'event_t' type cannot be used to declare a program scope variable}} +event_t glb_evt; // expected-error {{the 'event_t' type cannot be used to declare a program scope variable}} expected-error{{program scope variable must reside in constant address space}} constant struct evt_s { event_t evt; // expected-error {{the 'event_t' type cannot be used to declare a structure or union field}} @@ -10,7 +10,7 @@ void foo(event_t evt); // expected-note {{passing argument to parameter 'evt' he void kernel ker(event_t argevt) { // expected-error {{'event_t' cannot be used as the type of a kernel parameter}} event_t e; - constant event_t const_evt; // expected-error {{the event_t type can only be used with __private address space qualifier}} + constant event_t const_evt; // expected-error {{the event_t type can only be used with __private address space qualifier}} expected-error{{variable in constant address space must be initialized}} foo(e); foo(0); foo(5); // expected-error {{passing 'int' to parameter of incompatible type 'event_t'}} diff --git a/clang/test/SemaOpenCL/invalid-block.cl b/clang/test/SemaOpenCL/invalid-block.cl index 5d6dc380a37a1..7cbcea96d0acf 100644 --- a/clang/test/SemaOpenCL/invalid-block.cl +++ b/clang/test/SemaOpenCL/invalid-block.cl @@ -58,11 +58,11 @@ void f5(int i) { : bl2(i); // expected-error {{block type cannot be used as expression in ternary expression in OpenCL}} } // A block pointer type and all pointer operations are disallowed -void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type '__generic bl2_t' (aka 'int (__generic ^const __generic)(int)') is invalid in OpenCL}} +void f6(bl2_t *bl_ptr) { // expected-error{{pointer to type 'bl2_t' (aka 'int (__generic ^const)(int)') is invalid in OpenCL}} bl2_t bl = ^(int i) { return 1; }; - bl2_t *p; // expected-error {{pointer to type '__generic bl2_t' (aka 'int (__generic ^const __generic)(int)') is invalid in OpenCL}} + bl2_t *p; // expected-error {{pointer to type 'bl2_t' (aka 'int (__generic ^const)(int)') is invalid in OpenCL}} *bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (__generic ^const)(int)') to unary expression}} &bl; // expected-error {{invalid argument type 'bl2_t' (aka 'int (__generic ^const)(int)') to unary expression}} } diff --git a/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl b/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl index 69fa2b6da823f..de1b4f8858fa0 100644 --- a/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl +++ b/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl @@ -4,7 +4,7 @@ global pipe int gp; // expected-error {{type '__global read_only pipe int' can only be used as a function parameter in OpenCL}} global reserve_id_t rid; // expected-error {{the '__global reserve_id_t' type cannot be used to declare a program scope variable}} -extern pipe write_only int get_pipe(); // expected-error-re{{type '__global write_only pipe int ({{(void)?}})' can only be used as a function parameter in OpenCL}} +extern pipe write_only int get_pipe(); // expected-error-re{{type '__global write_only pipe int ({{(void)?}})' can only be used as a function parameter in OpenCL}} expected-error{{'write_only' attribute only applies to parameters and typedefs}} kernel void test_invalid_reserved_id(reserve_id_t ID) { // expected-error {{'reserve_id_t' cannot be used as the type of a kernel parameter}} } diff --git a/clang/test/SemaOpenCL/sampler_t.cl b/clang/test/SemaOpenCL/sampler_t.cl index fe9d997c89607..888e973cc31d8 100644 --- a/clang/test/SemaOpenCL/sampler_t.cl +++ b/clang/test/SemaOpenCL/sampler_t.cl @@ -48,6 +48,9 @@ constant struct sampler_s { sampler_t bad(void); //expected-error{{declaring function return value of type 'sampler_t' is not allowed}} sampler_t global_nonconst_smp = 0; // expected-error {{global sampler requires a const or constant address space qualifier}} +#ifdef CHECK_SAMPLER_VALUE +// expected-warning@-2{{sampler initializer has invalid Filter Mode bits}} +#endif const sampler_t glb_smp10 = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; const constant sampler_t glb_smp11 = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; @@ -62,7 +65,7 @@ void kernel ker(sampler_t argsmp) { } #if __OPENCL_C_VERSION__ == 200 -void bad(sampler_t*); // expected-error{{pointer to type '__generic sampler_t' is invalid in OpenCL}} +void bad(sampler_t *); // expected-error{{pointer to type 'sampler_t' is invalid in OpenCL}} #else void bad(sampler_t*); // expected-error{{pointer to type 'sampler_t' is invalid in OpenCL}} #endif diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl index ac6b2cabbd0cb..9bffeafb1c2db 100644 --- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl +++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl @@ -65,30 +65,42 @@ template x3::x3(const x3 &t) {} template -T xxx(T *in) { +T xxx(T *in1, T in2) { // This pointer can't be deduced to generic because addr space // will be taken from the template argument. //CHECK: `-VarDecl {{.*}} i 'T *' cinit - T *i = in; + T *i = in1; T ii; + __private T *ptr = ⅈ + ptr = &in2; return *i; } __kernel void test() { int foo[10]; - xxx(&foo[0]); + xxx<__private int>(&foo[0], foo[0]); + // FIXME: Template param deduction fails here because + // temporaries are not in the __private address space. + // It is probably reasonable to put them in __private + // considering that stack and function params are + // implicitly in __private. + // However, if temporaries are left in default addr + // space we should at least pretty print the __private + // addr space. Otherwise diagnostic apprears to be + // confusing. + //xxx(&foo[0], foo[0]); } // Addr space for pointer/reference to an array -//CHECK: FunctionDecl {{.*}} t1 'void (const __generic float (&)[2])' +//CHECK: FunctionDecl {{.*}} t1 'void (const float (__generic &)[2])' void t1(const float (&fYZ)[2]); -//CHECK: FunctionDecl {{.*}} t2 'void (const __generic float (*)[2])' +//CHECK: FunctionDecl {{.*}} t2 'void (const float (__generic *)[2])' void t2(const float (*fYZ)[2]); -//CHECK: FunctionDecl {{.*}} t3 'void (__generic float (((*)))[2])' +//CHECK: FunctionDecl {{.*}} t3 'void (float (((__generic *)))[2])' void t3(float(((*fYZ)))[2]); -//CHECK: FunctionDecl {{.*}} t4 'void (__generic float (((*__generic *)))[2])' +//CHECK: FunctionDecl {{.*}} t4 'void (float (((__generic *__generic *)))[2])' void t4(float(((**fYZ)))[2]); -//CHECK: FunctionDecl {{.*}} t5 'void (__generic float (*__generic (*))[2])' +//CHECK: FunctionDecl {{.*}} t5 'void (float (__generic *(__generic *))[2])' void t5(float (*(*fYZ))[2]); __kernel void k() { diff --git a/clang/test/SemaOpenCLCXX/addrspace-auto.cl b/clang/test/SemaOpenCLCXX/addrspace-auto.cl new file mode 100644 index 0000000000000..56fd9eb58ddc4 --- /dev/null +++ b/clang/test/SemaOpenCLCXX/addrspace-auto.cl @@ -0,0 +1,35 @@ +//RUN: %clang_cc1 %s -cl-std=clc++ -pedantic -ast-dump -verify | FileCheck %s + +__constant int i = 1; +//CHECK: |-VarDecl {{.*}} ai '__global int':'__global int' +auto ai = i; + +kernel void test() { + int i; + //CHECK: VarDecl {{.*}} ai 'int':'int' + auto ai = i; + + constexpr int c = 1; + //CHECK: VarDecl {{.*}} used cai '__constant int':'__constant int' + __constant auto cai = c; + //CHECK: VarDecl {{.*}} aii 'int':'int' + auto aii = cai; + + //CHECK: VarDecl {{.*}} ref 'int &' + auto &ref = i; + //CHECK: VarDecl {{.*}} ptr 'int *' + auto *ptr = &i; + //CHECK: VarDecl {{.*}} ref_c '__constant int &' + auto &ref_c = cai; + + //CHECK: VarDecl {{.*}} ptrptr 'int *__generic *' + auto **ptrptr = &ptr; + //CHECK: VarDecl {{.*}} refptr 'int *__generic &' + auto *&refptr = ptr; + + //CHECK: VarDecl {{.*}} invalid gref '__global auto &' + __global auto &gref = i; //expected-error{{variable 'gref' with type '__global auto &' has incompatible initializer of type 'int'}} + __local int *ptr_l; + //CHECK: VarDecl {{.*}} invalid gptr '__global auto *' + __global auto *gptr = ptr_l; //expected-error{{variable 'gptr' with type '__global auto *' has incompatible initializer of type '__local int *'}} +} diff --git a/clang/test/SemaOpenCLCXX/restricted.cl b/clang/test/SemaOpenCLCXX/restricted.cl index fc4938df5bf1e..c00c634073fe7 100644 --- a/clang/test/SemaOpenCLCXX/restricted.cl +++ b/clang/test/SemaOpenCLCXX/restricted.cl @@ -32,12 +32,14 @@ B *test_dynamic_cast(B *p) { __constant _Thread_local int a = 1; // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the '_Thread_local' storage class specifier}} // expected-warning@-2 {{'_Thread_local' is a C11 extension}} - +// expected-error@-3 {{thread-local storage is not supported for the current target}} __constant __thread int b = 2; // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the '__thread' storage class specifier}} +// expected-error@-2 {{thread-local storage is not supported for the current target}} kernel void test_storage_classes() { register int x; // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the 'register' storage class specifier}} thread_local int y; // expected-error@-1 {{C++ for OpenCL version 1.0 does not support the 'thread_local' storage class specifier}} + // expected-error@-2 {{thread-local storage is not supported for the current target}} } From 870f3542d3e0d06d208442bdca6482866b59171b Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 27 Nov 2019 16:05:02 +0300 Subject: [PATCH 106/591] [CodeGen][UBSan] Relax newly-added verbose sanitization tests for inc/dec In particular, don't hardcode the signature of the handler: it takes src filepath so the length of buffers will not match, --- ...tch-implicit-integer-sign-changes-incdec.c | 32 +++++++++---------- ...plicit-signed-integer-truncations-incdec.c | 32 +++++++++---------- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c index 3fe23ecbd8d9b..41e08ee32a525 100644 --- a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c @@ -41,8 +41,8 @@ unsigned short t0(unsigned short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -76,8 +76,8 @@ unsigned short t1(unsigned short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -112,8 +112,8 @@ unsigned short t2(unsigned short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -148,8 +148,8 @@ unsigned short t3(unsigned short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -185,8 +185,8 @@ signed short t4(signed short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -221,8 +221,8 @@ signed short t5(signed short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -258,8 +258,8 @@ signed short t6(signed short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -295,8 +295,8 @@ signed short t7(signed short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [91 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c index 5e602d60faffc..1e0bad1844c50 100644 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c @@ -41,8 +41,8 @@ unsigned short t0(unsigned short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -76,8 +76,8 @@ unsigned short t1(unsigned short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -112,8 +112,8 @@ unsigned short t2(unsigned short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -148,8 +148,8 @@ unsigned short t3(unsigned short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [17 x i8] }*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -184,8 +184,8 @@ signed short t4(signed short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -219,8 +219,8 @@ signed short t5(signed short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -255,8 +255,8 @@ signed short t6(signed short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: @@ -291,8 +291,8 @@ signed short t7(signed short x) { // CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize // CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ { [97 x i8]*, i32, i32 }, { i16, i16, [6 x i8] }*, { i16, i16, [8 x i8] }*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize // CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize // CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize // CHECK-SANITIZE: [[CONT]]: From 3c1912a733bae09585d88315a7eec39cd3318fde Mon Sep 17 00:00:00 2001 From: John Brawn Date: Wed, 27 Nov 2019 12:57:29 +0000 Subject: [PATCH 107/591] [ARM] Add constrained FP intrinsics test Currently XFAILed, as there are various things that need fixing. Differential Revision: https://reviews.llvm.org/D70599 --- llvm/test/CodeGen/ARM/fp-intrinsics.ll | 557 +++++++++++++++++++++++++ 1 file changed, 557 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/fp-intrinsics.ll diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll new file mode 100644 index 0000000000000..8d4a6376a9771 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -0,0 +1,557 @@ +; RUN: llc -mtriple=armv8a-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SP,CHECK-DP +; RUN: llc -mtriple=thumbv8m.main-none-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOSP,CHECK-NODP +; RUN: llc -mtriple=thumbv8m.main-none-eabi %s -o - -mattr=fp-armv8 | FileCheck %s --check-prefixes=CHECK,CHECK-SP,CHECK-DP +; RUN: llc -mtriple=thumbv8m.main-none-eabi %s -o - -mattr=fp-armv8sp | FileCheck %s --check-prefixes=CHECK,CHECK-SP,CHECK-NODP + +; Check that constrained fp intrinsics are correctly lowered. In particular +; check that the valid combinations of single-precision and double-precision +; hardware being present or absent work as expected (i.e. we get an instruction +; when one is available, otherwise a libcall). + +; FIXME: Tests fails as various things in CodeGen and Target/ARM need fixing. +; XFAIL: * + + +; Single-precision intrinsics + +; CHECK-LABEL: add_f32: +; CHECK-NOSP: bl __aeabi_fadd +; CHECK-SP: vadd.f32 +define float @add_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: sub_f32: +; CHECK-NOSP: bl __aeabi_fsub +; CHECK-SP: vsub.f32 +define float @sub_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: mul_f32: +; CHECK-NOSP: bl __aeabi_fmul +; CHECK-SP: vmul.f32 +define float @mul_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: div_f32: +; CHECK-NOSP: bl __aeabi_fdiv +; CHECK-SP: vdiv.f32 +define float @div_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: frem_f32: +; CHECK: bl fmodf +define float @frem_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: fma_f32: +; CHECK-NOSP: bl fmaf +; CHECK-SP: vfma.f32 +define float @fma_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: fptosi_f32: +; CHECK-NOSP: bl __aeabi_f2iz +; CHECK-SP: vcvt.s32.f32 +define i32 @fptosi_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptosi.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: fptoui_f32: +; CHECK-NOSP: bl __aeabi_f2uiz +; CHECK-SP: vcvt.u32.f32 +define i32 @fptoui_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptoui.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: sqrt_f32: +; CHECK-NOSP: bl sqrtf +; CHECK-SP: vsqrt.f32 +define float @sqrt_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: powi_f32: +; CHECK: bl __powisf2 +define float @powi_f32(float %x, i32 %y) #0 { + %val = call float @llvm.experimental.constrained.powi.f32(float %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: sin_f32: +; CHECK: bl sinf +define float @sin_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.sin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: cos_f32: +; CHECK: bl cosf +define float @cos_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.cos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: pow_f32: +; CHECK: bl powf +define float @pow_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.pow.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: log_f32: +; CHECK: bl logf +define float @log_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.log.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: log10_f32: +; CHECK: bl log10f +define float @log10_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.log10.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: log2_f32: +; CHECK: bl log2f +define float @log2_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.log2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: exp_f32: +; CHECK: bl expf +define float @exp_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.exp.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: exp2_f32: +; CHECK: bl exp2f +define float @exp2_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.exp2.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: rint_f32: +; CHECK-NOSP: bl rintf +; CHECK-SP: vrintx.f32 +define float @rint_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.rint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: nearbyint_f32: +; CHECK-NOSP: bl nearbyintf +; CHECK-SP: vrintr.f32 +define float @nearbyint_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.nearbyint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: lrint_f32: +; CHECK: bl lrintf +define i32 @lrint_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.lrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llrint_f32: +; CHECK: bl llrintf +define i32 @llrint_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.llrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: maxnum_f32: +; CHECK-NOSP: bl fmaxf +; CHECK-SP: vmaxnm.f32 +define float @maxnum_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.maxnum.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: minnum_f32: +; CHECK-NOSP: bl fminf +; CHECK-SP: vminnm.f32 +define float @minnum_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.minnum.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: ceil_f32: +; CHECK-NOSP: bl ceilf +; CHECK-SP: vrintp.f32 +define float @ceil_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.ceil.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: floor_f32: +; CHECK-NOSP: bl floorf +; CHECK-SP: vrintm.f32 +define float @floor_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.floor.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: lround_f32: +; CHECK: bl lroundf +define i32 @lround_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.lround.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llround_f32: +; CHECK: bl llroundf +define i32 @llround_f32(float %x) #0 { + %val = call i32 @llvm.experimental.constrained.llround.f32(float %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: round_f32: +; CHECK-NOSP: bl roundf +; CHECK-SP: vrinta.f32 +define float @round_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.round.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: trunc_f32: +; CHECK-NOSP: bl truncf +; CHECK-SP: vrintz.f32 +define float @trunc_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.trunc.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + + +; Double-precision intrinsics + +; CHECK-LABEL: add_f64: +; CHECK-NODP: bl __aeabi_dadd +; CHECK-DP: vadd.f64 +define double @add_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.fadd.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: sub_f64: +; CHECK-NODP: bl __aeabi_dsub +; CHECK-DP: vsub.f64 +define double @sub_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.fsub.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: mul_f64: +; CHECK-NODP: bl __aeabi_dmul +; CHECK-DP: vmul.f64 +define double @mul_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.fmul.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: div_f64: +; CHECK-NODP: bl __aeabi_ddiv +; CHECK-DP: vdiv.f64 +define double @div_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.fdiv.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: frem_f64: +; CHECK: bl fmod +define double @frem_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.frem.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: fma_f64: +; CHECK-NODP: bl fma +; CHECK-DP: vfma.f64 +define double @fma_f64(double %x, double %y, double %z) #0 { + %val = call double @llvm.experimental.constrained.fma.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: fptosi_f64: +; CHECK-NODP: bl __aeabi_d2iz +; CHECK-DP: vcvt.s32.f64 +define i32 @fptosi_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptosi.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: fptoui_f64: +; CHECK-NODP: bl __aeabi_d2uiz +; CHECK-DP: vcvt.u32.f64 +define i32 @fptoui_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.fptoui.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: sqrt_f64: +; CHECK-NODP: bl sqrt +; CHECK-DP: vsqrt.f64 +define double @sqrt_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.sqrt.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: powi_f64: +; CHECK: bl __powidf2 +define double @powi_f64(double %x, i32 %y) #0 { + %val = call double @llvm.experimental.constrained.powi.f64(double %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: sin_f64: +; CHECK: bl sin +define double @sin_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.sin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: cos_f64: +; CHECK: bl cos +define double @cos_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.cos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: pow_f64: +; CHECK: bl pow +define double @pow_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.pow.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: log_f64: +; CHECK: bl log +define double @log_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.log.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: log10_f64: +; CHECK: bl log10 +define double @log10_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.log10.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: log2_f64: +; CHECK: bl log2 +define double @log2_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.log2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: exp_f64: +; CHECK: bl exp +define double @exp_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.exp.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: exp2_f64: +; CHECK: bl exp2 +define double @exp2_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.exp2.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: rint_f64: +; CHECK-NODP: bl rint +; CHECK-DP: vrintx.f64 +define double @rint_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.rint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: nearbyint_f64: +; CHECK-NODP: bl nearbyint +; CHECK-DP: vrintr.f64 +define double @nearbyint_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.nearbyint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: lrint_f64: +; CHECK: bl lrint +define i32 @lrint_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.lrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llrint_f64: +; CHECK: bl llrint +define i32 @llrint_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.llrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: maxnum_f64: +; CHECK-NODP: bl fmax +; CHECK-DP: vmaxnm.f64 +define double @maxnum_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.maxnum.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: minnum_f64: +; CHECK-NODP: bl fmin +; CHECK-DP: vminnm.f64 +define double @minnum_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.minnum.f64(double %x, double %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: ceil_f64: +; CHECK-NODP: bl ceil +; CHECK-DP: vrintp.f64 +define double @ceil_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.ceil.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: floor_f64: +; CHECK-NODP: bl floor +; CHECK-DP: vrintm.f64 +define double @floor_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.floor.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: lround_f64: +; CHECK: bl lround +define i32 @lround_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.lround.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: llround_f64: +; CHECK: bl llround +define i32 @llround_f64(double %x) #0 { + %val = call i32 @llvm.experimental.constrained.llround.f64(double %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +; CHECK-LABEL: round_f64: +; CHECK-NODP: bl round +; CHECK-DP: vrinta.f64 +define double @round_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.round.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: trunc_f64: +; CHECK-NODP: bl trunc +; CHECK-DP: vrintz.f64 +define double @trunc_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.trunc.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + + +; Single/Double conversion intrinsics + +; CHECK-LABEL: fptrunc_f32: +; CHECK-NODP: bl __aeabi_d2f +; CHECK-DP: vcvt.f32.f64 +define float @fptrunc_f32(double %x) #0 { + %val = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: fpext_f32: +; CHECK-NODP: bl __aeabi_f2d +; CHECK-DP: vcvt.f64.f32 +define double @fpext_f32(float %x) #0 { + %val = call double @llvm.experimental.constrained.fpext.f64.f32(float %x, metadata !"fpexcept.strict") #0 + ret double %val +} + + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.f32(float, metadata) +declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) +declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.log10.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.log2.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.exp.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.exp2.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.llrint.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.lround.f32(float, metadata) +declare i32 @llvm.experimental.constrained.llround.f32(float, metadata) +declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata) + +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.f64(double, metadata) +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) +declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.log10.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.llrint.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.lround.f64(double, metadata) +declare i32 @llvm.experimental.constrained.llround.f64(double, metadata) +declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata) + +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) From 9f15fcc2718f95f1dac9e6e57aa93d84e9709930 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 27 Nov 2019 11:01:27 +0000 Subject: [PATCH 108/591] [ARM] Replace arm_neon_vqadds with sadd_sat This replaces the A32 NEON vqadds, vqaddu, vqsubs and vqsubu intrinsics with the target independent sadd_sat, uadd_sat, ssub_sat and usub_sat. This helps generate vqadds from standard IR nodes, which might be produced from the vectoriser. The old variants are removed in the process. Differential Revision: https://reviews.llvm.org/D69350 --- clang/lib/CodeGen/CGBuiltin.cpp | 12 +- .../test/CodeGen/arm-v8.1a-neon-intrinsics.c | 32 +- clang/test/CodeGen/arm_neon_intrinsics.c | 88 ++--- llvm/include/llvm/IR/IntrinsicsARM.td | 4 - llvm/lib/IR/AutoUpgrade.cpp | 20 ++ llvm/lib/Target/ARM/ARMISelLowering.cpp | 3 + llvm/lib/Target/ARM/ARMInstrNEON.td | 56 +-- llvm/test/CodeGen/ARM/addsubo-legalization.ll | 110 +----- llvm/test/CodeGen/ARM/neon-v8.1a.ll | 48 +-- .../test/CodeGen/ARM/neon-vqaddsub-upgrade.ll | 330 ++++++++++++++++++ llvm/test/CodeGen/ARM/vmul.ll | 4 +- llvm/test/CodeGen/ARM/vqadd.ll | 64 ++-- llvm/test/CodeGen/ARM/vqdmul.ll | 24 +- llvm/test/CodeGen/ARM/vqsub.ll | 64 ++-- 14 files changed, 567 insertions(+), 292 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ecac9aee5c7c0..26044f53e4965 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4621,10 +4621,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), - NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), - NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), - NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), - NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), + NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0), + NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0), NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), @@ -4642,8 +4642,8 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), - NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), - NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), + NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), + NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), diff --git a/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c index 6f5867b6c11f7..5462c17a1cc50 100644 --- a/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c +++ b/clang/test/CodeGen/arm-v8.1a-neon-intrinsics.c @@ -13,7 +13,7 @@ // CHECK-LABEL: test_vqrdmlah_s16 int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) +// CHECK-ARM: call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) @@ -23,7 +23,7 @@ int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: test_vqrdmlah_s32 int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) +// CHECK-ARM: call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) @@ -33,7 +33,7 @@ int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-LABEL: test_vqrdmlahq_s16 int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) +// CHECK-ARM: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) @@ -43,7 +43,7 @@ int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-LABEL: test_vqrdmlahq_s32 int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) +// CHECK-ARM: call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) @@ -54,7 +54,7 @@ int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) +// CHECK-ARM: call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) @@ -66,7 +66,7 @@ int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) +// CHECK-ARM: call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) @@ -78,7 +78,7 @@ int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) +// CHECK-ARM: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) @@ -90,7 +90,7 @@ int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) +// CHECK-ARM: call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) @@ -101,7 +101,7 @@ int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { // CHECK-LABEL: test_vqrdmlsh_s16 int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) +// CHECK-ARM: call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) @@ -111,7 +111,7 @@ int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-LABEL: test_vqrdmlsh_s32 int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) +// CHECK-ARM: call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) @@ -121,7 +121,7 @@ int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-LABEL: test_vqrdmlshq_s16 int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) +// CHECK-ARM: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) @@ -131,7 +131,7 @@ int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { // CHECK-LABEL: test_vqrdmlshq_s32 int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) +// CHECK-ARM: call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) @@ -142,7 +142,7 @@ int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> // CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) -// CHECK-ARM: call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) +// CHECK-ARM: call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> // CHECK-AARCH64: call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> {{%.*}}, <4 x i16> {{%.*}}) @@ -154,7 +154,7 @@ int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> // CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) -// CHECK-ARM: call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) +// CHECK-ARM: call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}) @@ -166,7 +166,7 @@ int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { // CHECK-ARM: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> // CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) -// CHECK-ARM: call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) +// CHECK-ARM: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) // CHECK-AARCH64: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <8 x i32> // CHECK-AARCH64: call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}}) @@ -178,7 +178,7 @@ int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { // CHECK-ARM: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> // CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) -// CHECK-ARM: call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) +// CHECK-ARM: call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) // CHECK-AARCH64: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <4 x i32> // CHECK-AARCH64: call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}) diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c index 0ec1198f99016..9f1a64554155c 100644 --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -9530,7 +9530,7 @@ int32x4_t test_vqabsq_s32(int32x4_t a) { } // CHECK-LABEL: @test_vqadd_s8( -// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQADD_V_I]] int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { return vqadd_s8(a, b); @@ -9539,7 +9539,7 @@ int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQADD_V2_I]] int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { @@ -9549,7 +9549,7 @@ int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQADD_V2_I]] int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { @@ -9559,7 +9559,7 @@ int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqadd_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQADD_V2_I]] int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { @@ -9567,7 +9567,7 @@ int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqadd_u8( -// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQADD_V_I]] uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { return vqadd_u8(a, b); @@ -9576,7 +9576,7 @@ uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vqadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQADD_V2_I]] uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { @@ -9586,7 +9586,7 @@ uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vqadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQADD_V2_I]] uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { @@ -9596,7 +9596,7 @@ uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vqadd_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQADD_V2_I]] uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { @@ -9604,7 +9604,7 @@ uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vqaddq_s8( -// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQADDQ_V_I]] int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { return vqaddq_s8(a, b); @@ -9613,7 +9613,7 @@ int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { @@ -9623,7 +9623,7 @@ int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { @@ -9633,7 +9633,7 @@ int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { @@ -9641,7 +9641,7 @@ int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqaddq_u8( -// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQADDQ_V_I]] uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { return vqaddq_u8(a, b); @@ -9650,7 +9650,7 @@ uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vqaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { @@ -9660,7 +9660,7 @@ uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { @@ -9670,7 +9670,7 @@ uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { @@ -9682,7 +9682,7 @@ uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_s16(a, b, c); @@ -9693,7 +9693,7 @@ int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_s32(a, b, c); @@ -9705,7 +9705,7 @@ int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_lane_s16(a, b, c, 3); @@ -9717,7 +9717,7 @@ int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_lane_s32(a, b, c, 1); @@ -9732,7 +9732,7 @@ int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) -// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) +// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlal_n_s16(a, b, c); @@ -9745,7 +9745,7 @@ int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) -// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) +// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlal_n_s32(a, b, c); @@ -9756,7 +9756,7 @@ int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_s16(a, b, c); @@ -9767,7 +9767,7 @@ int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_s32(a, b, c); @@ -9779,7 +9779,7 @@ int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_lane_s16(a, b, c, 3); @@ -9791,7 +9791,7 @@ int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_lane_s32(a, b, c, 1); @@ -9806,7 +9806,7 @@ int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> // CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) -// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) +// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlsl_n_s16(a, b, c); @@ -9819,7 +9819,7 @@ int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> // CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) -// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) +// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlsl_n_s32(a, b, c); @@ -10968,7 +10968,7 @@ uint32x2_t test_vqshrun_n_s64(int64x2_t a) { } // CHECK-LABEL: @test_vqsub_s8( -// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSUB_V_I]] int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { return vqsub_s8(a, b); @@ -10977,7 +10977,7 @@ int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { // CHECK-LABEL: @test_vqsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSUB_V2_I]] int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { @@ -10987,7 +10987,7 @@ int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { // CHECK-LABEL: @test_vqsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSUB_V2_I]] int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { @@ -10997,7 +10997,7 @@ int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { // CHECK-LABEL: @test_vqsub_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSUB_V2_I]] int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { @@ -11005,7 +11005,7 @@ int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { } // CHECK-LABEL: @test_vqsub_u8( -// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) +// CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b) // CHECK: ret <8 x i8> [[VQSUB_V_I]] uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { return vqsub_u8(a, b); @@ -11014,7 +11014,7 @@ uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { // CHECK-LABEL: @test_vqsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %a, <4 x i16> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <4 x i16> [[VQSUB_V2_I]] uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { @@ -11024,7 +11024,7 @@ uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { // CHECK-LABEL: @test_vqsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %a, <2 x i32> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <2 x i32> [[VQSUB_V2_I]] uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { @@ -11034,7 +11034,7 @@ uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { // CHECK-LABEL: @test_vqsub_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %a, <1 x i64> %b) +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %a, <1 x i64> %b) // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> // CHECK: ret <1 x i64> [[VQSUB_V2_I]] uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { @@ -11042,7 +11042,7 @@ uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { } // CHECK-LABEL: @test_vqsubq_s8( -// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { return vqsubq_s8(a, b); @@ -11051,7 +11051,7 @@ int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { // CHECK-LABEL: @test_vqsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { @@ -11061,7 +11061,7 @@ int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { // CHECK-LABEL: @test_vqsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { @@ -11071,7 +11071,7 @@ int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { // CHECK-LABEL: @test_vqsubq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { @@ -11079,7 +11079,7 @@ int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { } // CHECK-LABEL: @test_vqsubq_u8( -// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) +// CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b) // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { return vqsubq_u8(a, b); @@ -11088,7 +11088,7 @@ uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { // CHECK-LABEL: @test_vqsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %a, <8 x i16> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { @@ -11098,7 +11098,7 @@ uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { // CHECK-LABEL: @test_vqsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %a, <4 x i32> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { @@ -11108,7 +11108,7 @@ uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { // CHECK-LABEL: @test_vqsubq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %a, <2 x i64> %b) +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b) // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 10417411edca2..31069666b1e9e 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -426,8 +426,6 @@ let IntrProperties = [IntrNoMem, Commutative] in { def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic; def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic; def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic; - def int_arm_neon_vqadds : Neon_2Arg_Intrinsic; - def int_arm_neon_vqaddu : Neon_2Arg_Intrinsic; def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic; // Vector Multiply. @@ -459,8 +457,6 @@ let IntrProperties = [IntrNoMem, Commutative] in { // Vector Subtract. def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic; def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic; -def int_arm_neon_vqsubs : Neon_2Arg_Intrinsic; -def int_arm_neon_vqsubu : Neon_2Arg_Intrinsic; def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic; // Vector Absolute Compare. diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index d2dd2a69beab2..5aaf90df6f6e3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -559,6 +559,26 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); return true; } + if (Name.startswith("arm.neon.vqadds.")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat, + F->arg_begin()->getType()); + return true; + } + if (Name.startswith("arm.neon.vqaddu.")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat, + F->arg_begin()->getType()); + return true; + } + if (Name.startswith("arm.neon.vqsubs.")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat, + F->arg_begin()->getType()); + return true; + } + if (Name.startswith("arm.neon.vqsubu.")) { + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat, + F->arg_begin()->getType()); + return true; + } if (Name.startswith("aarch64.neon.addp")) { if (F->arg_size() != 2) break; // Invalid IR. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index c153e786e2ddb..83a06767a57fc 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -209,6 +209,9 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, VT != MVT::v2i64 && VT != MVT::v1i64) for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); + if (!VT.isFloatingPoint()) + for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) + setOperationAction(Opcode, VT, Legal); } void ARMTargetLowering::addDRTypeForNEON(MVT VT) { diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 94bb45bde5739..1653ce1275cf1 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -4287,10 +4287,10 @@ defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, // VQADD : Vector Saturating Add defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vqadd", "s", int_arm_neon_vqadds, 1>; + "vqadd", "s", saddsat, 1>; defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, - "vqadd", "u", int_arm_neon_vqaddu, 1>; + "vqadd", "u", uaddsat, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) @@ -4527,22 +4527,22 @@ let Predicates = [HasNEON, HasV8_1a] in { defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqadds + def : Pat<(v4i16 (saddsat (v4i16 DPR:$src1), (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))))), (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v2i32 (int_arm_neon_vqadds + def : Pat<(v2i32 (saddsat (v2i32 DPR:$src1), (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))))), (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v8i16 (int_arm_neon_vqadds + def : Pat<(v8i16 (saddsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))))), (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; - def : Pat<(v4i32 (int_arm_neon_vqadds + def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))))), @@ -4551,7 +4551,7 @@ let Predicates = [HasNEON, HasV8_1a] in { defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqadds + def : Pat<(v4i16 (saddsat (v4i16 DPR:$src1), (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), @@ -4559,7 +4559,7 @@ let Predicates = [HasNEON, HasV8_1a] in { imm:$lane)))))), (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; - def : Pat<(v2i32 (int_arm_neon_vqadds + def : Pat<(v2i32 (saddsat (v2i32 DPR:$src1), (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), @@ -4567,7 +4567,7 @@ let Predicates = [HasNEON, HasV8_1a] in { imm:$lane)))))), (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane))>; - def : Pat<(v8i16 (int_arm_neon_vqadds + def : Pat<(v8i16 (saddsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src2), @@ -4579,7 +4579,7 @@ let Predicates = [HasNEON, HasV8_1a] in { QPR:$src3, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; - def : Pat<(v4i32 (int_arm_neon_vqadds + def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src2), @@ -4597,22 +4597,22 @@ let Predicates = [HasNEON, HasV8_1a] in { defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqsubs + def : Pat<(v4i16 (ssubsat (v4i16 DPR:$src1), (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))))), (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v2i32 (int_arm_neon_vqsubs + def : Pat<(v2i32 (ssubsat (v2i32 DPR:$src1), (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))))), (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; - def : Pat<(v8i16 (int_arm_neon_vqsubs + def : Pat<(v8i16 (ssubsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))))), (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; - def : Pat<(v4i32 (int_arm_neon_vqsubs + def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))))), @@ -4621,14 +4621,14 @@ let Predicates = [HasNEON, HasV8_1a] in { defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", null_frag>; - def : Pat<(v4i16 (int_arm_neon_vqsubs + def : Pat<(v4i16 (ssubsat (v4i16 DPR:$src1), (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), imm:$lane)))))), (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; - def : Pat<(v2i32 (int_arm_neon_vqsubs + def : Pat<(v2i32 (ssubsat (v2i32 DPR:$src1), (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), @@ -4636,7 +4636,7 @@ let Predicates = [HasNEON, HasV8_1a] in { imm:$lane)))))), (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane))>; - def : Pat<(v8i16 (int_arm_neon_vqsubs + def : Pat<(v8i16 (ssubsat (v8i16 QPR:$src1), (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src2), @@ -4648,7 +4648,7 @@ let Predicates = [HasNEON, HasV8_1a] in { QPR:$src3, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; - def : Pat<(v4i32 (int_arm_neon_vqsubs + def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src2), @@ -4667,20 +4667,20 @@ defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; let Predicates = [HasNEON] in { -def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), +def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))))), (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), +def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))))), (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), +def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), imm:$lane)))))), (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; -def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), +def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1), (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), imm:$lane)))))), @@ -4759,20 +4759,20 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; let Predicates = [HasNEON] in { -def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), +def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))))), (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), +def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))))), (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; -def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), +def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm), imm:$lane)))))), (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; -def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), +def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1), (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm), imm:$lane)))))), @@ -5045,10 +5045,10 @@ defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, // VQSUB : Vector Saturing Subtract defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vqsub", "s", int_arm_neon_vqsubs, 0>; + "vqsub", "s", ssubsat, 0>; defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vqsub", "u", int_arm_neon_vqsubu, 0>; + "vqsub", "u", usubsat, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) diff --git a/llvm/test/CodeGen/ARM/addsubo-legalization.ll b/llvm/test/CodeGen/ARM/addsubo-legalization.ll index e9143d814d3de..e3a48ed0c14f1 100644 --- a/llvm/test/CodeGen/ARM/addsubo-legalization.ll +++ b/llvm/test/CodeGen/ARM/addsubo-legalization.ll @@ -95,48 +95,19 @@ define <2 x i1> @usubo(<2 x i64> *%ptr, <2 x i64> *%ptr2) { define <2 x i1> @saddo(<2 x i64> *%ptr, <2 x i64> *%ptr2) { ; CHECK-LABEL: saddo: ; CHECK: @ %bb.0: -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vld1.64 {d20, d21}, [r0] -; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vld1.64 {d18, d19}, [r1] -; CHECK-NEXT: vadd.i64 q8, q10, q9 -; CHECK-NEXT: vmov.32 r2, d20[0] -; CHECK-NEXT: vmov.32 r1, d20[1] -; CHECK-NEXT: vmov.32 r12, d16[0] -; CHECK-NEXT: vmov.32 r8, d16[1] -; CHECK-NEXT: vmov.32 lr, d17[0] -; CHECK-NEXT: vmov.32 r4, d21[0] -; CHECK-NEXT: vmov.32 r5, d17[1] -; CHECK-NEXT: vmov.32 r6, d18[1] -; CHECK-NEXT: vmov.32 r7, d21[1] -; CHECK-NEXT: subs.w r2, r12, r2 -; CHECK-NEXT: vmov.32 r2, d19[1] -; CHECK-NEXT: sbcs.w r1, r8, r1 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: subs.w r4, lr, r4 -; CHECK-NEXT: sbcs.w r7, r5, r7 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r3, #-1 -; CHECK-NEXT: asrs r7, r6, #31 -; CHECK-NEXT: vdup.32 d21, r3 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 -; CHECK-NEXT: vdup.32 d20, r1 +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vqadd.s64 q10, q9, q8 +; CHECK-NEXT: vadd.i64 q8, q9, q8 +; CHECK-NEXT: vceq.i32 q9, q8, q10 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0] -; CHECK-NEXT: asrs r2, r2, #31 -; CHECK-NEXT: vdup.32 d19, r2 -; CHECK-NEXT: vdup.32 d18, r7 -; CHECK-NEXT: veor q9, q9, q10 +; CHECK-NEXT: vrev64.32 q10, q9 +; CHECK-NEXT: vand q9, q9, q10 +; CHECK-NEXT: vmvn q9, q9 ; CHECK-NEXT: vmovn.i64 d18, q9 ; CHECK-NEXT: vmov r2, r1, d18 ; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: bx lr %x = load <2 x i64>, <2 x i64>* %ptr, align 8 %y = load <2 x i64>, <2 x i64>* %ptr2, align 8 %s = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64> %x, <2 x i64> %y) @@ -149,64 +120,19 @@ define <2 x i1> @saddo(<2 x i64> *%ptr, <2 x i64> *%ptr2) { define <2 x i1> @ssubo(<2 x i64> *%ptr, <2 x i64> *%ptr2) { ; CHECK-LABEL: ssubo: ; CHECK: @ %bb.0: -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vld1.64 {d18, d19}, [r1] -; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vld1.64 {d20, d21}, [r0] -; CHECK-NEXT: vsub.i64 q8, q10, q9 -; CHECK-NEXT: vmov.32 r1, d20[0] -; CHECK-NEXT: vmov.32 r12, d20[1] -; CHECK-NEXT: vmov.32 r3, d16[0] -; CHECK-NEXT: vmov.32 lr, d16[1] -; CHECK-NEXT: vmov.32 r4, d21[0] -; CHECK-NEXT: vmov.32 r5, d17[0] -; CHECK-NEXT: vmov.32 r6, d21[1] -; CHECK-NEXT: vmov.32 r7, d17[1] -; CHECK-NEXT: vmov.32 r8, d18[1] -; CHECK-NEXT: subs r1, r3, r1 -; CHECK-NEXT: vmov.32 r3, d18[0] -; CHECK-NEXT: sbcs.w r1, lr, r12 -; CHECK-NEXT: vmov.32 r12, d19[0] -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: subs r5, r5, r4 -; CHECK-NEXT: vmov.32 r5, d19[1] -; CHECK-NEXT: sbcs r7, r6 -; CHECK-NEXT: mov.w r7, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 -; CHECK-NEXT: vdup.32 d21, r7 -; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: sbcs.w r3, r2, r8 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #1 -; CHECK-NEXT: rsbs.w r6, r12, #0 -; CHECK-NEXT: sbcs.w r6, r2, r5 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vdup.32 d19, r2 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r3, #-1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 -; CHECK-NEXT: vdup.32 d18, r3 -; CHECK-NEXT: vdup.32 d20, r1 -; CHECK-NEXT: veor q9, q9, q10 +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vqsub.s64 q10, q9, q8 +; CHECK-NEXT: vsub.i64 q8, q9, q8 +; CHECK-NEXT: vceq.i32 q9, q8, q10 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0] +; CHECK-NEXT: vrev64.32 q10, q9 +; CHECK-NEXT: vand q9, q9, q10 +; CHECK-NEXT: vmvn q9, q9 ; CHECK-NEXT: vmovn.i64 d18, q9 ; CHECK-NEXT: vmov r2, r1, d18 ; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: bx lr %x = load <2 x i64>, <2 x i64>* %ptr, align 8 %y = load <2 x i64>, <2 x i64>* %ptr2, align 8 %s = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64> %x, <2 x i64> %y) diff --git a/llvm/test/CodeGen/ARM/neon-v8.1a.ll b/llvm/test/CodeGen/ARM/neon-v8.1a.ll index 91259139d4463..95d2085800810 100644 --- a/llvm/test/CodeGen/ARM/neon-v8.1a.ll +++ b/llvm/test/CodeGen/ARM/neon-v8.1a.ll @@ -8,20 +8,20 @@ declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) -declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) -declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) -declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_vqrdmlah_v4i16: %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) - %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod) + %retval = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) ; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <4 x i16> %retval } @@ -29,7 +29,7 @@ define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_vqrdmlah_v8i16: %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) - %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod) + %retval = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) ; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <8 x i16> %retval } @@ -37,7 +37,7 @@ define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_vqrdmlah_v2i32: %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) - %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod) + %retval = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) ; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <2 x i32> %retval } @@ -45,7 +45,7 @@ define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_vqrdmlah_v4i32: %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) - %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod) + %retval = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) ; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <4 x i32> %retval } @@ -53,7 +53,7 @@ define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_vqrdmlsh_v4i16: %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) - %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod) + %retval = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) ; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <4 x i16> %retval } @@ -61,7 +61,7 @@ define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_vqrdmlsh_v8i16: %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) - %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod) + %retval = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) ; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <8 x i16> %retval } @@ -69,7 +69,7 @@ define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_vqrdmlsh_v2i32: %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) - %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod) + %retval = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) ; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ret <2 x i32> %retval } @@ -77,7 +77,7 @@ define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> define <4 x i32> @test_vqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_vqrdmlsh_v4i32: %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) - %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod) + %retval = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) ; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} ret <4 x i32> %retval } @@ -90,7 +90,7 @@ define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) - %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod) + %retval = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) ; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] ret <4 x i16> %retval } @@ -100,7 +100,7 @@ define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16 entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) - %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod) + %retval = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) ; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] ret <8 x i16> %retval } @@ -110,7 +110,7 @@ define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) - %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod) + %retval = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) ; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] ret <2 x i32> %retval } @@ -120,7 +120,7 @@ define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) - %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod) + %retval = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) ; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] ret <4 x i32> %retval } @@ -130,7 +130,7 @@ define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) - %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod) + %retval = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %acc, <4 x i16> %prod) ; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] ret <4 x i16> %retval } @@ -140,7 +140,7 @@ define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16 entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) - %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod) + %retval = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %acc, <8 x i16> %prod) ; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] ret <8 x i16> %retval } @@ -150,7 +150,7 @@ define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) - %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod) + %retval = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %acc, <2 x i32> %prod) ; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] ret <2 x i32> %retval } @@ -160,7 +160,7 @@ define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) - %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod) + %retval = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %acc, <4 x i32> %prod) ; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] ret <4 x i32> %retval } diff --git a/llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll b/llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll new file mode 100644 index 0000000000000..a1323810151a5 --- /dev/null +++ b/llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll @@ -0,0 +1,330 @@ +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s + +define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: vqadds8: +;CHECK: vqadd.s8 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: vqadds16: +;CHECK: vqadd.s16 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vqadds32: +;CHECK: vqadd.s32 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vqadds64: +;CHECK: vqadd.s64 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: vqaddu8: +;CHECK: vqadd.u8 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: vqaddu16: +;CHECK: vqadd.u16 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vqaddu32: +;CHECK: vqadd.u32 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vqaddu64: +;CHECK: vqadd.u64 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: vqaddQs8: +;CHECK: vqadd.s8 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: vqaddQs16: +;CHECK: vqadd.s16 + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: vqaddQs32: +;CHECK: vqadd.s32 + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vqaddQs64: +;CHECK: vqadd.s64 + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: vqaddQu8: +;CHECK: vqadd.u8 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: vqaddQu16: +;CHECK: vqadd.u16 + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: vqaddQu32: +;CHECK: vqadd.u32 + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vqaddQu64: +;CHECK: vqadd.u64 + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + + +define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: vqsubs8: +;CHECK: vqsub.s8 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: vqsubs16: +;CHECK: vqsub.s16 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vqsubs32: +;CHECK: vqsub.s32 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vqsubs64: +;CHECK: vqsub.s64 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK-LABEL: vqsubu8: +;CHECK: vqsub.u8 + %tmp1 = load <8 x i8>, <8 x i8>* %A + %tmp2 = load <8 x i8>, <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: vqsubu16: +;CHECK: vqsub.u16 + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vqsubu32: +;CHECK: vqsub.u32 + %tmp1 = load <2 x i32>, <2 x i32>* %A + %tmp2 = load <2 x i32>, <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK-LABEL: vqsubu64: +;CHECK: vqsub.u64 + %tmp1 = load <1 x i64>, <1 x i64>* %A + %tmp2 = load <1 x i64>, <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: vqsubQs8: +;CHECK: vqsub.s8 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: vqsubQs16: +;CHECK: vqsub.s16 + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: vqsubQs32: +;CHECK: vqsub.s32 + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vqsubQs64: +;CHECK: vqsub.s64 + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LABEL: vqsubQu8: +;CHECK: vqsub.u8 + %tmp1 = load <16 x i8>, <16 x i8>* %A + %tmp2 = load <16 x i8>, <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: vqsubQu16: +;CHECK: vqsub.u16 + %tmp1 = load <8 x i16>, <8 x i16>* %A + %tmp2 = load <8 x i16>, <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: vqsubQu32: +;CHECK: vqsub.u32 + %tmp1 = load <4 x i32>, <4 x i32>* %A + %tmp2 = load <4 x i32>, <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vqsubQu64: +;CHECK: vqsub.u64 + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/vmul.ll b/llvm/test/CodeGen/ARM/vmul.ll index fcffe175e2bac..e8cf8d9b27b6f 100644 --- a/llvm/test/CodeGen/ARM/vmul.ll +++ b/llvm/test/CodeGen/ARM/vmul.ll @@ -574,7 +574,7 @@ for.body33: ; preds = %for.body33, %for.bo %vmovl.i225 = zext <8 x i8> undef to <8 x i16> %mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249 %vshl_n = shl <8 x i16> %mul.i223, - %vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> , <8 x i16> %vshl_n) nounwind + %vqsub2.i216 = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> , <8 x i16> %vshl_n) nounwind %mul.i209 = mul <8 x i16> undef, %vshr_n130 = lshr <8 x i16> undef, %vshr_n134 = lshr <8 x i16> %mul.i209, @@ -608,7 +608,7 @@ for.end179: ; preds = %for.cond.loopexit, } declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone ; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8), diff --git a/llvm/test/CodeGen/ARM/vqadd.ll b/llvm/test/CodeGen/ARM/vqadd.ll index d1e90cb209449..47432c7b732d6 100644 --- a/llvm/test/CodeGen/ARM/vqadd.ll +++ b/llvm/test/CodeGen/ARM/vqadd.ll @@ -5,7 +5,7 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vqadd.s8 %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vqadd.s16 %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -23,7 +23,7 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vqadd.s32 %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -32,7 +32,7 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vqadd.s64 %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + %tmp3 = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } @@ -41,7 +41,7 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vqadd.u8 %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -50,7 +50,7 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vqadd.u16 %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -59,7 +59,7 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vqadd.u32 %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -68,7 +68,7 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vqadd.u64 %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + %tmp3 = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } @@ -77,7 +77,7 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vqadd.s8 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -86,7 +86,7 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vqadd.s16 %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -95,7 +95,7 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vqadd.s32 %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -104,7 +104,7 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vqadd.s64 %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } @@ -113,7 +113,7 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vqadd.u8 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -122,7 +122,7 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vqadd.u16 %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -131,7 +131,7 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vqadd.u32 %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -140,26 +140,26 @@ define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vqadd.u64 %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/vqdmul.ll b/llvm/test/CodeGen/ARM/vqdmul.ll index 6da080012a1e6..fa938d45becfb 100644 --- a/llvm/test/CodeGen/ARM/vqdmul.ll +++ b/llvm/test/CodeGen/ARM/vqdmul.ll @@ -204,7 +204,7 @@ define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = load <4 x i16>, <4 x i16>* %C %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3) - %tmp5 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4) + %tmp5 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4) ret <4 x i32> %tmp5 } @@ -215,7 +215,7 @@ define <2 x i64> @vqdmlals32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = load <2 x i32>, <2 x i32>* %C %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3) - %tmp5 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4) + %tmp5 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4) ret <2 x i64> %tmp5 } @@ -225,7 +225,7 @@ entry: ; CHECK: vqdmlal.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0) - %2 = tail call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1) ret <4 x i32> %2 } @@ -235,12 +235,12 @@ entry: ; CHECK: vqdmlal.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0) - %2 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1) + %2 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1) ret <2 x i64> %2 } -declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ;CHECK-LABEL: vqdmlsls16_natural: @@ -249,7 +249,7 @@ define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = load <4 x i16>, <4 x i16>* %C %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3) - %tmp5 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4) + %tmp5 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4) ret <4 x i32> %tmp5 } @@ -260,7 +260,7 @@ define <2 x i64> @vqdmlsls32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = load <2 x i32>, <2 x i32>* %C %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3) - %tmp5 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4) + %tmp5 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4) ret <2 x i64> %tmp5 } @@ -270,7 +270,7 @@ entry: ; CHECK: vqdmlsl.s16 q0, d2, d3[1] %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg1_int16x4_t, <4 x i16> %0) - %2 = tail call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1) + %2 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %1) ret <4 x i32> %2 } @@ -280,9 +280,9 @@ entry: ; CHECK: vqdmlsl.s32 q0, d2, d3[1] %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg1_int32x2_t, <2 x i32> %0) - %2 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1) + %2 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i64> %1) ret <2 x i64> %2 } -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/ARM/vqsub.ll b/llvm/test/CodeGen/ARM/vqsub.ll index 40963ce824864..9864f6421cb3d 100644 --- a/llvm/test/CodeGen/ARM/vqsub.ll +++ b/llvm/test/CodeGen/ARM/vqsub.ll @@ -5,7 +5,7 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vqsub.s8 %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -14,7 +14,7 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vqsub.s16 %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -23,7 +23,7 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vqsub.s32 %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -32,7 +32,7 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vqsub.s64 %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + %tmp3 = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } @@ -41,7 +41,7 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vqsub.u8 %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } @@ -50,7 +50,7 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vqsub.u16 %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } @@ -59,7 +59,7 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vqsub.u32 %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } @@ -68,7 +68,7 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { ;CHECK: vqsub.u64 %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B - %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + %tmp3 = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) ret <1 x i64> %tmp3 } @@ -77,7 +77,7 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vqsub.s8 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -86,7 +86,7 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vqsub.s16 %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -95,7 +95,7 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vqsub.s32 %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -104,7 +104,7 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vqsub.s64 %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } @@ -113,7 +113,7 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK: vqsub.u8 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B - %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + %tmp3 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) ret <16 x i8> %tmp3 } @@ -122,7 +122,7 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: vqsub.u16 %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B - %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + %tmp3 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) ret <8 x i16> %tmp3 } @@ -131,7 +131,7 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK: vqsub.u32 %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B - %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + %tmp3 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) ret <4 x i32> %tmp3 } @@ -140,26 +140,26 @@ define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ;CHECK: vqsub.u64 %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B - %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + %tmp3 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) ret <2 x i64> %tmp3 } -declare <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone +declare <8 x i8> @llvm.usub.sat.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.usub.sat.v1i64(<1 x i64>, <1 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone -declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone -declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone -declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone +declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>) nounwind readnone From cbfa237892e55b7129a1178c9b03f26683d643af Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 27 Nov 2019 17:02:01 +0300 Subject: [PATCH 109/591] Revert "[clang][CodeGen] Implicit Conversion Sanitizer: handle increment/decrement (PR44054)" The asssertion that was added does not hold, breaks on test-suite/MultiSource/Applications/SPASS/analyze.c Will reduce the testcase and revisit. This reverts commit 9872ea4ed1de4c49300430e4f1f4dfc110a79ab9, 870f3542d3e0d06d208442bdca6482866b59171b. --- clang/docs/ReleaseNotes.rst | 4 - clang/lib/CodeGen/CGExprScalar.cpp | 36 +- ...catch-implicit-conversions-incdec-basics.c | 139 -------- ...er-arithmetic-value-change-incdec-basics.c | 139 -------- ...plicit-integer-conversions-incdec-basics.c | 139 -------- ...licit-integer-sign-changes-incdec-basics.c | 139 -------- ...tch-implicit-integer-sign-changes-incdec.c | 307 ------------------ ...plicit-integer-truncations-incdec-basics.c | 139 -------- ...signed-integer-truncations-incdec-basics.c | 139 -------- ...plicit-signed-integer-truncations-incdec.c | 303 ----------------- ...signed-integer-truncations-incdec-basics.c | 101 ------ .../integer-conversion-incdec.c | 122 ------- .../integer-sign-change-incdec.c | 120 ------- .../signed-integer-truncation-incdec.c | 122 ------- 14 files changed, 3 insertions(+), 1946 deletions(-) delete mode 100644 clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c delete mode 100644 clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c delete mode 100644 clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c delete mode 100644 clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c delete mode 100644 clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c delete mode 100644 clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c delete mode 100644 clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c delete mode 100644 clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c delete mode 100644 clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c delete mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c delete mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c delete mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 37a8f30e0bc9c..4ac300deb589a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -78,10 +78,6 @@ Non-comprehensive list of changes in this release been extended to detect these cases, so that code relying on them can be detected and fixed. -* The Implicit Conversion Sanitizer (``-fsanitize=implicit-conversion``) has - learned to sanitize pre/post increment/decrement of types with bit width - smaller than ``int``. - - For X86 target, -march=skylake-avx512, -march=icelake-client, -march=icelake-server, -march=cascadelake, -march=cooperlake will default to not using 512-bit zmm registers in vectorized code unless 512-bit intrinsics diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 953ced9168c5b..8229766406434 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2419,39 +2419,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // Most common case by far: integer increment. } else if (type->isIntegerType()) { - assert((!type->isPromotableIntegerType() || - (type->isSignedIntegerOrEnumerationType() || - CGF.getContext() - .getPromotedIntegerType(type) - ->isSignedIntegerOrEnumerationType())) && - "The following check expects that if we do promotion, at least one " - "of the types (either base or promoted) will be signed."); - if (CGF.SanOpts.hasOneOf( - SanitizerKind::ImplicitIntegerArithmeticValueChange) && - type->isPromotableIntegerType()) { - // While `x += 1` (for `x` with width less than int) is modeled as - // promotion+arithmetics+demotion, and we can catch lossy demotion with - // ease; inc/dec with width less than int can't overflow because of - // promotion rules, so we omit promotion+demotion, which means that we can - // not catch lossy "demotion". Because we still want to catch these cases - // when the sanitizer is enabled, we perform the promotion, then perform - // the increment/decrement in the wider type, and finally - // perform the demotion. This will catch lossy demotions. - - QualType promotedType = CGF.getContext().getPromotedIntegerType(type); - assert(promotedType != type && "Shouldn't promote to the same type."); - value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); - Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); - value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); - // Do pass non-default ScalarConversionOpts so that sanitizer check is - // emitted. - value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), - ScalarConversionOpts(CGF.SanOpts)); - - // Note that signed integer inc/dec with width less than int can't - // overflow because of promotion rules; we're just eliding a few steps - // here. - } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { + // Note that signed integer inc/dec with width less than int can't + // overflow because of promotion rules; we're just eliding a few steps here. + if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); } else if (E->canOverflow() && type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { diff --git a/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c deleted file mode 100644 index e97a72cb0a339..0000000000000 --- a/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c +++ /dev/null @@ -1,139 +0,0 @@ -// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK - -// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } - -// CHECK-LABEL: @t0( -unsigned short t0(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) -#line 100 - x++; - return x; -} -// CHECK-LABEL: @t1( -unsigned short t1(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) -#line 200 - x--; - return x; -} -// CHECK-LABEL: @t2( -unsigned short t2(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) -#line 300 - ++x; - return x; -} -// CHECK-LABEL: @t3( -unsigned short t3(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) -#line 400 - --x; - return x; -} - -// CHECK-LABEL: @t4( -signed short t4(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) -#line 500 - x++; - return x; -} -// CHECK-LABEL: @t5( -signed short t5(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) -#line 600 - x--; - return x; -} -// CHECK-LABEL: @t6( -signed short t6(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) -#line 700 - ++x; - return x; -} -// CHECK-LABEL: @t7( -signed short t7(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) -#line 800 - --x; - return x; -} - -// CHECK-LABEL: @t8( -unsigned char t8(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) -#line 900 - x++; - return x; -} -// CHECK-LABEL: @t9( -unsigned char t9(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) -#line 1000 - x--; - return x; -} -// CHECK-LABEL: @t10( -unsigned char t10(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) -#line 1100 - ++x; - return x; -} -// CHECK-LABEL: @t11( -unsigned char t11(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) -#line 1200 - --x; - return x; -} - -// CHECK-LABEL: @t12( -signed char t12(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) -#line 1300 - x++; - return x; -} -// CHECK-LABEL: @t13( -signed char t13(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) -#line 1400 - x--; - return x; -} -// CHECK-LABEL: @t14( -signed char t14(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) -#line 1500 - ++x; - return x; -} -// CHECK-LABEL: @t15( -signed char t15(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) -#line 1600 - --x; - return x; -} diff --git a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c deleted file mode 100644 index 5e0aa1108dfc9..0000000000000 --- a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c +++ /dev/null @@ -1,139 +0,0 @@ -// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK - -// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } - -// CHECK-LABEL: @t0( -unsigned short t0(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) -#line 100 - x++; - return x; -} -// CHECK-LABEL: @t1( -unsigned short t1(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) -#line 200 - x--; - return x; -} -// CHECK-LABEL: @t2( -unsigned short t2(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) -#line 300 - ++x; - return x; -} -// CHECK-LABEL: @t3( -unsigned short t3(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) -#line 400 - --x; - return x; -} - -// CHECK-LABEL: @t4( -signed short t4(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) -#line 500 - x++; - return x; -} -// CHECK-LABEL: @t5( -signed short t5(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) -#line 600 - x--; - return x; -} -// CHECK-LABEL: @t6( -signed short t6(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) -#line 700 - ++x; - return x; -} -// CHECK-LABEL: @t7( -signed short t7(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) -#line 800 - --x; - return x; -} - -// CHECK-LABEL: @t8( -unsigned char t8(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) -#line 900 - x++; - return x; -} -// CHECK-LABEL: @t9( -unsigned char t9(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) -#line 1000 - x--; - return x; -} -// CHECK-LABEL: @t10( -unsigned char t10(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) -#line 1100 - ++x; - return x; -} -// CHECK-LABEL: @t11( -unsigned char t11(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) -#line 1200 - --x; - return x; -} - -// CHECK-LABEL: @t12( -signed char t12(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) -#line 1300 - x++; - return x; -} -// CHECK-LABEL: @t13( -signed char t13(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) -#line 1400 - x--; - return x; -} -// CHECK-LABEL: @t14( -signed char t14(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) -#line 1500 - ++x; - return x; -} -// CHECK-LABEL: @t15( -signed char t15(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) -#line 1600 - --x; - return x; -} diff --git a/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c deleted file mode 100644 index e97a72cb0a339..0000000000000 --- a/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c +++ /dev/null @@ -1,139 +0,0 @@ -// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK - -// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } - -// CHECK-LABEL: @t0( -unsigned short t0(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) -#line 100 - x++; - return x; -} -// CHECK-LABEL: @t1( -unsigned short t1(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) -#line 200 - x--; - return x; -} -// CHECK-LABEL: @t2( -unsigned short t2(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) -#line 300 - ++x; - return x; -} -// CHECK-LABEL: @t3( -unsigned short t3(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) -#line 400 - --x; - return x; -} - -// CHECK-LABEL: @t4( -signed short t4(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) -#line 500 - x++; - return x; -} -// CHECK-LABEL: @t5( -signed short t5(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) -#line 600 - x--; - return x; -} -// CHECK-LABEL: @t6( -signed short t6(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) -#line 700 - ++x; - return x; -} -// CHECK-LABEL: @t7( -signed short t7(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) -#line 800 - --x; - return x; -} - -// CHECK-LABEL: @t8( -unsigned char t8(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) -#line 900 - x++; - return x; -} -// CHECK-LABEL: @t9( -unsigned char t9(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) -#line 1000 - x--; - return x; -} -// CHECK-LABEL: @t10( -unsigned char t10(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) -#line 1100 - ++x; - return x; -} -// CHECK-LABEL: @t11( -unsigned char t11(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) -#line 1200 - --x; - return x; -} - -// CHECK-LABEL: @t12( -signed char t12(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) -#line 1300 - x++; - return x; -} -// CHECK-LABEL: @t13( -signed char t13(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) -#line 1400 - x--; - return x; -} -// CHECK-LABEL: @t14( -signed char t14(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) -#line 1500 - ++x; - return x; -} -// CHECK-LABEL: @t15( -signed char t15(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) -#line 1600 - --x; - return x; -} diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c deleted file mode 100644 index 93495b331b9f8..0000000000000 --- a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c +++ /dev/null @@ -1,139 +0,0 @@ -// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK - -// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } -// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } - -// CHECK-LABEL: @t0( -unsigned short t0(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) -#line 100 - x++; - return x; -} -// CHECK-LABEL: @t1( -unsigned short t1(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) -#line 200 - x--; - return x; -} -// CHECK-LABEL: @t2( -unsigned short t2(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) -#line 300 - ++x; - return x; -} -// CHECK-LABEL: @t3( -unsigned short t3(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) -#line 400 - --x; - return x; -} - -// CHECK-LABEL: @t4( -signed short t4(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) -#line 500 - x++; - return x; -} -// CHECK-LABEL: @t5( -signed short t5(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) -#line 600 - x--; - return x; -} -// CHECK-LABEL: @t6( -signed short t6(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) -#line 700 - ++x; - return x; -} -// CHECK-LABEL: @t7( -signed short t7(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) -#line 800 - --x; - return x; -} - -// CHECK-LABEL: @t8( -unsigned char t8(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) -#line 900 - x++; - return x; -} -// CHECK-LABEL: @t9( -unsigned char t9(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) -#line 1000 - x--; - return x; -} -// CHECK-LABEL: @t10( -unsigned char t10(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) -#line 1100 - ++x; - return x; -} -// CHECK-LABEL: @t11( -unsigned char t11(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) -#line 1200 - --x; - return x; -} - -// CHECK-LABEL: @t12( -signed char t12(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) -#line 1300 - x++; - return x; -} -// CHECK-LABEL: @t13( -signed char t13(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) -#line 1400 - x--; - return x; -} -// CHECK-LABEL: @t14( -signed char t14(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) -#line 1500 - ++x; - return x; -} -// CHECK-LABEL: @t15( -signed char t15(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) -#line 1600 - --x; - return x; -} diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c deleted file mode 100644 index 41e08ee32a525..0000000000000 --- a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c +++ /dev/null @@ -1,307 +0,0 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE - -// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fno-sanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE -// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER -// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-trap=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE - -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } - -unsigned short t0(unsigned short x) { -// CHECK-NOSANITIZE-LABEL: @t0( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] -// -// CHECK-SANITIZE-LABEL: @t0( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] -#line 100 - return x++; -} -unsigned short t1(unsigned short x) { -// CHECK-NOSANITIZE-LABEL: @t1( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] -// -// CHECK-SANITIZE-LABEL: @t1( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] -#line 200 - return x--; -} - -unsigned short t2(unsigned short x) { -// CHECK-NOSANITIZE-LABEL: @t2( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] -// -// CHECK-SANITIZE-LABEL: @t2( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] -#line 300 - return ++x; -} - -unsigned short t3(unsigned short x) { -// CHECK-NOSANITIZE-LABEL: @t3( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] -// -// CHECK-SANITIZE-LABEL: @t3( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] -#line 400 - return --x; -} - -signed short t4(signed short x) { -// CHECK-NOSANITIZE-LABEL: @t4( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] -// -// CHECK-SANITIZE-LABEL: @t4( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] -#line 500 - return x++; -} -signed short t5(signed short x) { -// CHECK-NOSANITIZE-LABEL: @t5( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] -// -// CHECK-SANITIZE-LABEL: @t5( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] -#line 600 - return x--; -} - -signed short t6(signed short x) { -// CHECK-NOSANITIZE-LABEL: @t6( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] -// -// CHECK-SANITIZE-LABEL: @t6( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] -#line 700 - return ++x; -} - -signed short t7(signed short x) { -// CHECK-NOSANITIZE-LABEL: @t7( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] -// -// CHECK-SANITIZE-LABEL: @t7( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 -// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] -#line 800 - return --x; -} diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c deleted file mode 100644 index 6ac2be6d9fd0c..0000000000000 --- a/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c +++ /dev/null @@ -1,139 +0,0 @@ -// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK - -// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } - -// CHECK-LABEL: @t0( -unsigned short t0(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) -#line 100 - x++; - return x; -} -// CHECK-LABEL: @t1( -unsigned short t1(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) -#line 200 - x--; - return x; -} -// CHECK-LABEL: @t2( -unsigned short t2(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) -#line 300 - ++x; - return x; -} -// CHECK-LABEL: @t3( -unsigned short t3(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) -#line 400 - --x; - return x; -} - -// CHECK-LABEL: @t4( -signed short t4(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) -#line 500 - x++; - return x; -} -// CHECK-LABEL: @t5( -signed short t5(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) -#line 600 - x--; - return x; -} -// CHECK-LABEL: @t6( -signed short t6(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) -#line 700 - ++x; - return x; -} -// CHECK-LABEL: @t7( -signed short t7(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) -#line 800 - --x; - return x; -} - -// CHECK-LABEL: @t8( -unsigned char t8(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) -#line 900 - x++; - return x; -} -// CHECK-LABEL: @t9( -unsigned char t9(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) -#line 1000 - x--; - return x; -} -// CHECK-LABEL: @t10( -unsigned char t10(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) -#line 1100 - ++x; - return x; -} -// CHECK-LABEL: @t11( -unsigned char t11(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) -#line 1200 - --x; - return x; -} - -// CHECK-LABEL: @t12( -signed char t12(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) -#line 1300 - x++; - return x; -} -// CHECK-LABEL: @t13( -signed char t13(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) -#line 1400 - x--; - return x; -} -// CHECK-LABEL: @t14( -signed char t14(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) -#line 1500 - ++x; - return x; -} -// CHECK-LABEL: @t15( -signed char t15(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) -#line 1600 - --x; - return x; -} diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c deleted file mode 100644 index b7e438c7229ce..0000000000000 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c +++ /dev/null @@ -1,139 +0,0 @@ -// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK - -// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } - -// CHECK-LABEL: @t0( -unsigned short t0(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) -#line 100 - x++; - return x; -} -// CHECK-LABEL: @t1( -unsigned short t1(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) -#line 200 - x--; - return x; -} -// CHECK-LABEL: @t2( -unsigned short t2(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) -#line 300 - ++x; - return x; -} -// CHECK-LABEL: @t3( -unsigned short t3(unsigned short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) -#line 400 - --x; - return x; -} - -// CHECK-LABEL: @t4( -signed short t4(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) -#line 500 - x++; - return x; -} -// CHECK-LABEL: @t5( -signed short t5(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) -#line 600 - x--; - return x; -} -// CHECK-LABEL: @t6( -signed short t6(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) -#line 700 - ++x; - return x; -} -// CHECK-LABEL: @t7( -signed short t7(signed short x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) -#line 800 - --x; - return x; -} - -// CHECK-LABEL: @t8( -unsigned char t8(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) -#line 900 - x++; - return x; -} -// CHECK-LABEL: @t9( -unsigned char t9(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) -#line 1000 - x--; - return x; -} -// CHECK-LABEL: @t10( -unsigned char t10(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) -#line 1100 - ++x; - return x; -} -// CHECK-LABEL: @t11( -unsigned char t11(unsigned char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) -#line 1200 - --x; - return x; -} - -// CHECK-LABEL: @t12( -signed char t12(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) -#line 1300 - x++; - return x; -} -// CHECK-LABEL: @t13( -signed char t13(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) -#line 1400 - x--; - return x; -} -// CHECK-LABEL: @t14( -signed char t14(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) -#line 1500 - ++x; - return x; -} -// CHECK-LABEL: @t15( -signed char t15(signed char x) { - // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) -#line 1600 - --x; - return x; -} diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c deleted file mode 100644 index 1e0bad1844c50..0000000000000 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c +++ /dev/null @@ -1,303 +0,0 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE - -// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fno-sanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE -// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER -// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-trap=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE - -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } - -unsigned short t0(unsigned short x) { -// CHECK-NOSANITIZE-LABEL: @t0( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] -// -// CHECK-SANITIZE-LABEL: @t0( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize -// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] -#line 100 - return x++; -} -unsigned short t1(unsigned short x) { -// CHECK-NOSANITIZE-LABEL: @t1( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] -// -// CHECK-SANITIZE-LABEL: @t1( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize -// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] -#line 200 - return x--; -} - -unsigned short t2(unsigned short x) { -// CHECK-NOSANITIZE-LABEL: @t2( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] -// -// CHECK-SANITIZE-LABEL: @t2( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize -// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] -#line 300 - return ++x; -} - -unsigned short t3(unsigned short x) { -// CHECK-NOSANITIZE-LABEL: @t3( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] -// -// CHECK-SANITIZE-LABEL: @t3( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize -// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] -#line 400 - return --x; -} - -signed short t4(signed short x) { -// CHECK-NOSANITIZE-LABEL: @t4( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] -// -// CHECK-SANITIZE-LABEL: @t4( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize -// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] -#line 500 - return x++; -} -signed short t5(signed short x) { -// CHECK-NOSANITIZE-LABEL: @t5( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] -// -// CHECK-SANITIZE-LABEL: @t5( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize -// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] -#line 600 - return x--; -} - -signed short t6(signed short x) { -// CHECK-NOSANITIZE-LABEL: @t6( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] -// -// CHECK-SANITIZE-LABEL: @t6( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize -// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] -#line 700 - return ++x; -} - -signed short t7(signed short x) { -// CHECK-NOSANITIZE-LABEL: @t7( -// CHECK-NOSANITIZE-NEXT: entry: -// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 -// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 -// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] -// -// CHECK-SANITIZE-LABEL: @t7( -// CHECK-SANITIZE-NEXT: entry: -// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 -// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 -// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 -// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize -// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize -// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize -// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: -// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize -// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize -// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize -// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize -// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize -// CHECK-SANITIZE: [[CONT]]: -// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 -// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] -#line 800 - return --x; -} diff --git a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c deleted file mode 100644 index 7ad12314f3df0..0000000000000 --- a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c +++ /dev/null @@ -1,101 +0,0 @@ -// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK - -// CHECK-LABEL: @t0( -unsigned short t0(unsigned short x) { -#line 100 - x++; - return x; -} -// CHECK-LABEL: @t1( -unsigned short t1(unsigned short x) { -#line 200 - x--; - return x; -} -// CHECK-LABEL: @t2( -unsigned short t2(unsigned short x) { -#line 300 - ++x; - return x; -} -// CHECK-LABEL: @t3( -unsigned short t3(unsigned short x) { -#line 400 - --x; - return x; -} - -// CHECK-LABEL: @t4( -signed short t4(signed short x) { -#line 500 - x++; - return x; -} -// CHECK-LABEL: @t5( -signed short t5(signed short x) { -#line 600 - x--; - return x; -} -// CHECK-LABEL: @t6( -signed short t6(signed short x) { -#line 700 - ++x; - return x; -} -// CHECK-LABEL: @t7( -signed short t7(signed short x) { -#line 800 - --x; - return x; -} - -// CHECK-LABEL: @t8( -unsigned char t8(unsigned char x) { -#line 900 - x++; - return x; -} -// CHECK-LABEL: @t9( -unsigned char t9(unsigned char x) { -#line 1000 - x--; - return x; -} -// CHECK-LABEL: @t10( -unsigned char t10(unsigned char x) { -#line 1100 - ++x; - return x; -} -// CHECK-LABEL: @t11( -unsigned char t11(unsigned char x) { -#line 1200 - --x; - return x; -} - -// CHECK-LABEL: @t12( -signed char t12(signed char x) { -#line 1300 - x++; - return x; -} -// CHECK-LABEL: @t13( -signed char t13(signed char x) { -#line 1400 - x--; - return x; -} -// CHECK-LABEL: @t14( -signed char t14(signed char x) { -#line 1500 - ++x; - return x; -} -// CHECK-LABEL: @t15( -signed char t15(signed char x) { -#line 1600 - --x; - return x; -} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c deleted file mode 100644 index 0e62c02d3affb..0000000000000 --- a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c +++ /dev/null @@ -1,122 +0,0 @@ -// RUN: %clang -x c -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK - -// RUN: %clang -x c++ -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK - -void test_unsigned() { - unsigned char x; - - x = 0; - x++; - x = 0; - ++x; - - x = 0; - x--; - // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) - x = 0; - --x; - // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) - - x = 1; - x++; - x = 1; - ++x; - - x = 1; - x--; - x = 1; - --x; - - x = 254; - x++; - x = 254; - ++x; - - x = 254; - x--; - x = 254; - --x; - - x = 255; - x++; - // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) - x = 255; - ++x; - // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) - - x = 255; - x--; - x = 255; - --x; -} - -void test_signed() { - signed char x; - - x = -128; - x++; - x = -128; - ++x; - - x = -128; - x--; - // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) - x = -128; - --x; - // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) - - x = -1; - x++; - x = -1; - ++x; - - x = -1; - x--; - x = -1; - --x; - - x = 0; - x++; - x = 0; - ++x; - - x = 0; - x--; - x = 0; - --x; - - x = 1; - x++; - x = 1; - ++x; - - x = 1; - x--; - x = 1; - --x; - - x = 127; - x++; - // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) - x = 127; - ++x; - // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) - - x = 127; - x--; - x = 127; - --x; -} - -int main() { - test_unsigned(); - test_signed(); - - return 0; -} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c deleted file mode 100644 index 4b56a105aa289..0000000000000 --- a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c +++ /dev/null @@ -1,120 +0,0 @@ -// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK - -// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK - -void test_unsigned() { - unsigned char x; - - x = 0; - x++; - x = 0; - ++x; - - x = 0; - x--; - // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) - x = 0; - --x; - // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) - - x = 1; - x++; - x = 1; - ++x; - - x = 1; - x--; - x = 1; - --x; - - x = 254; - x++; - x = 254; - ++x; - - x = 254; - x--; - x = 254; - --x; - - x = 255; - x++; - x = 255; - ++x; - - x = 255; - x--; - x = 255; - --x; -} - -void test_signed() { - signed char x; - - x = -128; - x++; - x = -128; - ++x; - - x = -128; - x--; - // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) - x = -128; - --x; - // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) - - x = -1; - x++; - x = -1; - ++x; - - x = -1; - x--; - x = -1; - --x; - - x = 0; - x++; - x = 0; - ++x; - - x = 0; - x--; - x = 0; - --x; - - x = 1; - x++; - x = 1; - ++x; - - x = 1; - x--; - x = 1; - --x; - - x = 127; - x++; - // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) - x = 127; - ++x; - // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) - - x = 127; - x--; - x = 127; - --x; -} - -int main() { - test_unsigned(); - test_signed(); - - return 0; -} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c deleted file mode 100644 index 4806efb24eb13..0000000000000 --- a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c +++ /dev/null @@ -1,122 +0,0 @@ -// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK - -// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK - -void test_unsigned() { - unsigned char x; - - x = 0; - x++; - x = 0; - ++x; - - x = 0; - x--; - // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) - x = 0; - --x; - // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) - - x = 1; - x++; - x = 1; - ++x; - - x = 1; - x--; - x = 1; - --x; - - x = 254; - x++; - x = 254; - ++x; - - x = 254; - x--; - x = 254; - --x; - - x = 255; - x++; - // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) - x = 255; - ++x; - // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) - - x = 255; - x--; - x = 255; - --x; -} - -void test_signed() { - signed char x; - - x = -128; - x++; - x = -128; - ++x; - - x = -128; - x--; - // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) - x = -128; - --x; - // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) - - x = -1; - x++; - x = -1; - ++x; - - x = -1; - x--; - x = -1; - --x; - - x = 0; - x++; - x = 0; - ++x; - - x = 0; - x--; - x = 0; - --x; - - x = 1; - x++; - x = 1; - ++x; - - x = 1; - x--; - x = 1; - --x; - - x = 127; - x++; - // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) - x = 127; - ++x; - // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) - - x = 127; - x--; - x = 127; - --x; -} - -int main() { - test_unsigned(); - test_signed(); - - return 0; -} From 0f4383faa75fdeaeebe0c5156f927e9f88d61d53 Mon Sep 17 00:00:00 2001 From: Hideto Ueno Date: Wed, 27 Nov 2019 14:41:12 +0000 Subject: [PATCH 110/591] [Attributor] Handle special case when offset equals zero in nonnull deduction --- llvm/lib/Transforms/IPO/Attributor.cpp | 24 +++++++++++++++------ llvm/test/Transforms/FunctionAttrs/align.ll | 6 ++---- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 366c347daeb1e..faf0cdfd08ed3 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -308,15 +308,16 @@ static const Value *getPointerOperand(const Instruction *I) { return nullptr; } -static const Value *getBasePointerOfAccessPointerOperand(const Instruction *I, - int64_t &BytesOffset, - const DataLayout &DL) { +static const Value * +getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, + const DataLayout &DL, + bool AllowNonInbounds = false) { const Value *Ptr = getPointerOperand(I); if (!Ptr) return nullptr; return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL, - /*AllowNonInbounds*/ false); + AllowNonInbounds); } ChangeStatus AbstractAttribute::update(Attributor &A) { @@ -1702,8 +1703,7 @@ static int64_t getKnownNonNullAndDerefBytesForUse( return 0; } if (auto *GEP = dyn_cast(I)) - if (GEP->hasAllZeroIndices() || - (GEP->isInBounds() && GEP->hasAllConstantIndices())) { + if (GEP->hasAllConstantIndices()) { TrackUse = true; return 0; } @@ -1718,6 +1718,18 @@ static int64_t getKnownNonNullAndDerefBytesForUse( return std::max(int64_t(0), DerefBytes); } } + + /// Corner case when an offset is 0. + if (const Value *Base = getBasePointerOfAccessPointerOperand( + I, Offset, DL, /*AllowNonInbounds*/ true)) { + if (Offset == 0 && Base == &AssociatedValue && + getPointerOperand(I) == UseV) { + int64_t DerefBytes = + (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()); + IsNonNull |= !NullPointerIsDefined; + return std::max(int64_t(0), DerefBytes); + } + } if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL, /*AllowNonInbounds*/ false)) { diff --git a/llvm/test/Transforms/FunctionAttrs/align.ll b/llvm/test/Transforms/FunctionAttrs/align.ll index b8817a44fce79..a5bf91915baf8 100644 --- a/llvm/test/Transforms/FunctionAttrs/align.ll +++ b/llvm/test/Transforms/FunctionAttrs/align.ll @@ -351,8 +351,7 @@ define i64 @test12-1(i32* align 4 %p) { ret i64 %ret } -; FXIME: %p should have nonnull -; ATTRIBUTOR: define i64 @test12-2(i32* nocapture nofree readonly align 16 %p) +; ATTRIBUTOR: define i64 @test12-2(i32* nocapture nofree nonnull readonly align 16 dereferenceable(8) %p) define i64 @test12-2(i32* align 4 %p) { %p-cast = bitcast i32* %p to i64* %arrayidx0 = getelementptr i64, i64* %p-cast, i64 0 @@ -370,8 +369,7 @@ define void @test12-3(i32* align 4 %p) { ret void } -; FXIME: %p should have nonnull -; ATTRIBUTOR: define void @test12-4(i32* nocapture nofree writeonly align 16 %p) +; ATTRIBUTOR: define void @test12-4(i32* nocapture nofree nonnull writeonly align 16 dereferenceable(8) %p) define void @test12-4(i32* align 4 %p) { %p-cast = bitcast i32* %p to i64* %arrayidx0 = getelementptr i64, i64* %p-cast, i64 0 From 900d8a9a3b4efeefddd310e92219741d98e7270b Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Wed, 27 Nov 2019 15:47:44 +0100 Subject: [PATCH 111/591] [profile] Fix file contention causing dropped counts on Windows under -fprofile-generate See PR43425: https://bugs.llvm.org/show_bug.cgi?id=43425 When writing profile data on Windows we were opening profile file with exclusive read/write access. In case we are trying to write to the file from multiple processes simultaneously, subsequent calls to CreateFileA would return INVALID_HANDLE_VALUE. To fix this, I changed to open without exclusive access and then take a lock. Patch by Michael Holman! Differential revision: https://reviews.llvm.org/D70330 --- compiler-rt/lib/profile/InstrProfilingUtil.c | 9 +- .../Windows/Inputs/instrprof-multiprocess.c | 89 +++++++++++++++++++ .../Windows/instrprof-multiprocess.test | 10 +++ .../test/profile/Windows/lit.local.cfg.py | 9 ++ 4 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 compiler-rt/test/profile/Windows/Inputs/instrprof-multiprocess.c create mode 100644 compiler-rt/test/profile/Windows/instrprof-multiprocess.test create mode 100644 compiler-rt/test/profile/Windows/lit.local.cfg.py diff --git a/compiler-rt/lib/profile/InstrProfilingUtil.c b/compiler-rt/lib/profile/InstrProfilingUtil.c index 13301f341fc5a..bf5a9670fe18c 100644 --- a/compiler-rt/lib/profile/InstrProfilingUtil.c +++ b/compiler-rt/lib/profile/InstrProfilingUtil.c @@ -207,8 +207,9 @@ COMPILER_RT_VISIBILITY FILE *lprofOpenFileEx(const char *ProfileName) { f = fdopen(fd, "r+b"); #elif defined(_WIN32) // FIXME: Use the wide variants to handle Unicode filenames. - HANDLE h = CreateFileA(ProfileName, GENERIC_READ | GENERIC_WRITE, 0, 0, - OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0); + HANDLE h = CreateFileA(ProfileName, GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, 0); if (h == INVALID_HANDLE_VALUE) return NULL; @@ -218,6 +219,10 @@ COMPILER_RT_VISIBILITY FILE *lprofOpenFileEx(const char *ProfileName) { return NULL; } + if (lprofLockFd(fd) != 0) + PROF_WARN("Data may be corrupted during profile merging : %s\n", + "Fail to obtain file lock due to system limit."); + f = _fdopen(fd, "r+b"); if (f == 0) { CloseHandle(h); diff --git a/compiler-rt/test/profile/Windows/Inputs/instrprof-multiprocess.c b/compiler-rt/test/profile/Windows/Inputs/instrprof-multiprocess.c new file mode 100644 index 0000000000000..774712d39738c --- /dev/null +++ b/compiler-rt/test/profile/Windows/Inputs/instrprof-multiprocess.c @@ -0,0 +1,89 @@ +/* This is a test case where the parent process forks 10 children + * which contend to merge profile data to the same file. With + * file locking support, the data from each child should not + * be lost. + */ +#include +#include +#include + +void spawn_child(PROCESS_INFORMATION *pi, int child_num) { + wchar_t child_str[10]; + _itow(child_num, child_str, 10); + if (!SetEnvironmentVariableW(L"CHILD_NUM", child_str)) { + printf("SetEnvironmentVariableW failed (0x%8lx).\n", GetLastError()); + fflush(stdout); + exit(1); + } + + STARTUPINFOW si; + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + + memset(pi, 0, sizeof(PROCESS_INFORMATION)); + + if (!CreateProcessW(NULL, // No module name (use command line) + GetCommandLineW(), // Command line + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + TRUE, // Set handle inheritance to TRUE + 0, // No flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &si, pi)) { + printf("CreateProcess failed (0x%08lx).\n", GetLastError()); + fflush(stdout); + exit(1); + } +} + +int wait_child(PROCESS_INFORMATION *pi) { + WaitForSingleObject(pi->hProcess, INFINITE); + + DWORD exit_code; + if (!GetExitCodeProcess(pi->hProcess, &exit_code)) { + printf("GetExitCodeProcess failed (0x%08lx).\n", GetLastError()); + fflush(stdout); + exit(1); + } + + CloseHandle(pi->hProcess); + CloseHandle(pi->hThread); + + return exit_code; +} + +#define NUM_CHILDREN 10 + +int foo(int num) { + if (num < (NUM_CHILDREN / 2)) { + return 1; + } else if (num < NUM_CHILDREN) { + return 2; + } + return 3; +} + +int main(int argc, char *argv[]) { + char *child_str = getenv("CHILD_NUM"); + if (!child_str) { + PROCESS_INFORMATION child[NUM_CHILDREN]; + // In parent + for (int i = 0; i < NUM_CHILDREN; i++) { + spawn_child(&child[i], i); + } + for (int i = 0; i < NUM_CHILDREN; i++) { + wait_child(&child[i]); + } + return 0; + } else { + // In child + int child_num = atoi(child_str); + int result = foo(child_num); + if (result == 3) { + fprintf(stderr, "Invalid child count!"); + return 1; + } + return 0; + } +} diff --git a/compiler-rt/test/profile/Windows/instrprof-multiprocess.test b/compiler-rt/test/profile/Windows/instrprof-multiprocess.test new file mode 100644 index 0000000000000..ae5ebd45bec95 --- /dev/null +++ b/compiler-rt/test/profile/Windows/instrprof-multiprocess.test @@ -0,0 +1,10 @@ +RUN: %clang_profgen %S/Inputs/instrprof-multiprocess.c -o %t +RUN: rm -f %t_*.profraw +RUN: env LLVM_PROFILE_FILE=%t_%m.profraw %run %t +RUN: llvm-profdata show --counts -function=foo %t_*.profraw | FileCheck %s + +CHECK: Counters: +CHECK: foo: +CHECK: Function count: 10 +CHECK: Block counts: [5, 5] +CHECK: Functions shown: 1 diff --git a/compiler-rt/test/profile/Windows/lit.local.cfg.py b/compiler-rt/test/profile/Windows/lit.local.cfg.py new file mode 100644 index 0000000000000..e924d91c44934 --- /dev/null +++ b/compiler-rt/test/profile/Windows/lit.local.cfg.py @@ -0,0 +1,9 @@ +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +if root.host_os not in ['Windows']: + config.unsupported = True From f59614d906b5428f3687a44ee018df5840b301dd Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Nov 2019 10:00:56 -0500 Subject: [PATCH 112/591] [OPENMP50]Add if clause in parallel for simd directive. According to OpenMP 5.0, if clause can be used in parallel for simd directive. If condition in the if clause if false, the non-vectorized version of the loop must be executed. --- clang/lib/Sema/SemaOpenMP.cpp | 7 +- .../test/OpenMP/parallel_for_simd_codegen.cpp | 167 +++++++++++++----- 2 files changed, 130 insertions(+), 44 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 59178fb671fb0..2773efcf3daea 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4538,6 +4538,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( Res = ActOnOpenMPParallelForSimdDirective( ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA); AllowedNameModifiers.push_back(OMPD_parallel); + if (LangOpts.OpenMP >= 50) + AllowedNameModifiers.push_back(OMPD_simd); break; case OMPD_parallel_sections: Res = ActOnOpenMPParallelSectionsDirective(ClausesWithImplicit, AStmt, @@ -10677,11 +10679,14 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( if (NameModifier == OMPD_unknown || NameModifier == OMPD_taskloop) CaptureRegion = OMPD_parallel; break; + case OMPD_parallel_for_simd: + if (NameModifier == OMPD_unknown || NameModifier == OMPD_simd) + CaptureRegion = OMPD_parallel; + break; case OMPD_cancel: case OMPD_parallel: case OMPD_parallel_sections: case OMPD_parallel_for: - case OMPD_parallel_for_simd: case OMPD_target: case OMPD_target_simd: case OMPD_target_teams: diff --git a/clang/test/OpenMP/parallel_for_simd_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_codegen.cpp index 9585bf293695c..01f2b4c42a243 100644 --- a/clang/test/OpenMP/parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_codegen.cpp @@ -1,14 +1,24 @@ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=OMP45 --check-prefix=CHECK // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=OMP50 --check-prefix=CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG + // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // expected-no-diagnostics +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} #ifndef HEADER #define HEADER @@ -75,7 +85,7 @@ void simple(float *a, float *b, float *c, float *d) { // CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_VAR:%[^,]+]] // CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]] -// CHECK: call void @__kmpc_dispatch_init_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK: call void @__kmpc_dispatch_init_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32 {{35|1073741859}}, i32 0, i32 8, i32 1, i32 1) // CHECK: [[NEXT:%.+]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* {{.+}}, i32 %{{.+}}, i32* %{{.+}}, i32* [[LB:%.+]], i32* [[UB:%.+]], i32* %{{.+}}) // CHECK: [[COND:%.+]] = icmp ne i32 [[NEXT]], 0 // CHECK: br i1 [[COND]], label %[[CONT:.+]], label %[[END:.+]] @@ -386,6 +396,51 @@ void inst_templ1() { templ1 (a, z); } +// OMP50: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) +// OMP50: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP50: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 +// OMP50: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] +// OMP50: [[TRUE]]: +// OMP50: br label %[[SWITCH:[^,]+]] +// OMP50: [[FALSE]]: +// OMP50: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP50: br label %[[SWITCH]] +// OMP50: [[SWITCH]]: +// OMP50: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] +// OMP50: store i64 [[UP]], i64* [[UB]], +// OMP50: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// OMP50: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], + +// ... +// OMP50: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] +// OMP50-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] +// OMP50-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] +// OMP50: [[T1_BODY]]: +// Loop counters i and j updates: +// OMP50: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 +// OMP50-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 +// OMP50-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] +// OMP50-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 +// OMP50-NEXT: store i32 [[I_2]], i32* +// OMP50: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 +// OMP50-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 +// OMP50-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] +// OMP50-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// OMP50-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] +// OMP50-NEXT: store i64 [[J_2_ADD0]], i64* +// simd.for.inc: +// OMP50: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP50-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 +// OMP50-NEXT: store i64 [[INC]], i64* +// OMP50-NEXT: br label {{%.+}} +// OMP50: [[T1_END]]: +// OMP50: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) +// OMP50: ret void +// typedef int MyIdx; @@ -674,51 +729,77 @@ void widened(float *a, float *b, float *c, float *d) { // CHECK: ret void } -// CHECK: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], -// CHECK: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 -// CHECK: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] -// CHECK: [[TRUE]]: -// CHECK: br label %[[SWITCH:[^,]+]] -// CHECK: [[FALSE]]: -// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], -// CHECK: br label %[[SWITCH]] -// CHECK: [[SWITCH]]: -// CHECK: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] -// CHECK: store i64 [[UP]], i64* [[UB]], -// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], -// CHECK: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], +// CHECK-LABEL: if_clause +void if_clause(int a) { + #pragma omp parallel for simd if(a) schedule(static, 1) +for (int i = 0; i < 10; ++i); +} +// CHECK: call void @__kmpc_for_static_init_4( +// OMP50: [[COND:%.+]] = trunc i8 %{{.+}} to i1 +// OMP50: br i1 [[COND]], label {{%?}}[[THEN:.+]], label {{%?}}[[ELSE:.+]] + +// OMP50: [[THEN]]: +// OMP45: br label {{.+}}, !llvm.loop ![[VECT:.+]] +// OMP50: br label {{.+}}, !llvm.loop ![[VECT:.+]] +// OMP50: [[ELSE]]: +// OMP50: br label {{.+}}, !llvm.loop ![[NOVECT:.+]] +// CHECK: call void @__kmpc_for_static_fini( + +// OMP45: call void @__kmpc_for_static_init_8(%struct.ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1) +// OMP45: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP45: [[CMP:%.+]] = icmp sgt i64 [[UB_VAL]], 15 +// OMP45: br i1 [[CMP]], label %[[TRUE:.+]], label %[[FALSE:[^,]+]] +// OMP45: [[TRUE]]: +// OMP45: br label %[[SWITCH:[^,]+]] +// OMP45: [[FALSE]]: +// OMP45: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// OMP45: br label %[[SWITCH]] +// OMP45: [[SWITCH]]: +// OMP45: [[UP:%.+]] = phi i64 [ 15, %[[TRUE]] ], [ [[UB_VAL]], %[[FALSE]] ] +// OMP45: store i64 [[UP]], i64* [[UB]], +// OMP45: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// OMP45: store i64 [[LB_VAL]], i64* [[T1_OMP_IV:%[^,]+]], // ... -// CHECK: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] -// CHECK-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] -// CHECK-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] -// CHECK: [[T1_BODY]]: +// OMP45: [[IV:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[UB_VAL:%.+]] = load i64, i64* [[UB]] +// OMP45-NEXT: [[CMP1:%.+]] = icmp sle i64 [[IV]], [[UB_VAL]] +// OMP45-NEXT: br i1 [[CMP1]], label %[[T1_BODY:.+]], label %[[T1_END:[^,]+]] +// OMP45: [[T1_BODY]]: // Loop counters i and j updates: -// CHECK: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 -// CHECK-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 -// CHECK-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] -// CHECK-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 -// CHECK-NEXT: store i32 [[I_2]], i32* -// CHECK: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 -// CHECK-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 -// CHECK-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] -// CHECK-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 -// CHECK-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] -// CHECK-NEXT: store i64 [[J_2_ADD0]], i64* +// OMP45: [[IV1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[I_1:%.+]] = sdiv i64 [[IV1]], 4 +// OMP45-NEXT: [[I_1_MUL1:%.+]] = mul nsw i64 [[I_1]], 1 +// OMP45-NEXT: [[I_1_ADD0:%.+]] = add nsw i64 0, [[I_1_MUL1]] +// OMP45-NEXT: [[I_2:%.+]] = trunc i64 [[I_1_ADD0]] to i32 +// OMP45-NEXT: store i32 [[I_2]], i32* +// OMP45: [[IV2:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45: [[IV2_1:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[DIV_1:%.+]] = sdiv i64 [[IV2_1]], 4 +// OMP45-NEXT: [[MUL_1:%.+]] = mul nsw i64 [[DIV_1]], 4 +// OMP45-NEXT: [[J_1:%.+]] = sub nsw i64 [[IV2]], [[MUL_1]] +// OMP45-NEXT: [[J_2:%.+]] = mul nsw i64 [[J_1]], 2 +// OMP45-NEXT: [[J_2_ADD0:%.+]] = add nsw i64 0, [[J_2]] +// OMP45-NEXT: store i64 [[J_2_ADD0]], i64* // simd.for.inc: -// CHECK: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] -// CHECK-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 -// CHECK-NEXT: store i64 [[INC]], i64* -// CHECK-NEXT: br label {{%.+}} -// CHECK: [[T1_END]]: -// CHECK: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) -// CHECK: ret void +// OMP45: [[IV3:%.+]] = load i64, i64* [[T1_OMP_IV]] +// OMP45-NEXT: [[INC:%.+]] = add nsw i64 [[IV3]], 1 +// OMP45-NEXT: store i64 [[INC]], i64* +// OMP45-NEXT: br label {{%.+}} +// OMP45: [[T1_END]]: +// OMP45: call void @__kmpc_for_static_fini(%struct.ident_t* {{.+}}, i32 %{{.+}}) +// OMP45: ret void // + +// OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} +// OMP45-DAG: ![[VECT]] = distinct !{![[VECT]], ![[VM:.+]]} +// OMP45-DAG: ![[VM]] = !{!"llvm.loop.vectorize.enable", i1 true} +// OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} +// OMP50-DAG: ![[VECT]] = distinct !{![[VECT]], ![[VM:.+]]} +// OMP50-DAG: ![[VM]] = !{!"llvm.loop.vectorize.enable", i1 true} +// OMP50-DAG: ![[NOVECT]] = distinct !{![[NOVECT]], ![[NOVM:.+]]} +// OMP50-DAG: ![[NOVM]] = !{!"llvm.loop.vectorize.enable", i1 false} + // TERM_DEBUG-LABEL: bar int bar() {return 0;}; From 703c26f03be74daf6e483380e6b23029a3851081 Mon Sep 17 00:00:00 2001 From: marshall Date: Wed, 27 Nov 2019 07:13:00 -0800 Subject: [PATCH 113/591] Optimize and fix basic_string move assignment operator. Reviewed as https://reviews.llvm.org/D68623. Thanks to mvels for the patch. --- libcxx/include/string | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/libcxx/include/string b/libcxx/include/string index c16dbedc51c0f..4e0b21135a7e6 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -2289,10 +2289,20 @@ basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, tr _NOEXCEPT_(is_nothrow_move_assignable::value) #endif { - __clear_and_shrink(); - __r_.first() = __str.__r_.first(); - __move_assign_alloc(__str); - __str.__zero(); + if (__is_long()) { + __alloc_traits::deallocate(__alloc(), __get_long_pointer(), + __get_long_cap()); +#if _LIBCPP_STD_VER <= 14 + if (!is_nothrow_move_assignable::value) { + __set_short_size(0); + traits_type::assign(__get_short_pointer()[0], value_type()); + } +#endif + } + __move_assign_alloc(__str); + __r_.first() = __str.__r_.first(); + __str.__set_short_size(0); + traits_type::assign(__str.__get_short_pointer()[0], value_type()); } template From 939544add98ee6463d6abd6c28fa6c9ac4b6e104 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 27 Nov 2019 16:22:16 +0100 Subject: [PATCH 114/591] [clangd] Handle the missing call expr in targetDecl. Reviewers: sammccall Reviewed By: sammccall Subscribers: merge_guards_bot, ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70773 --- clang-tools-extra/clangd/FindTarget.cpp | 3 +++ .../clangd/unittests/FindTargetTests.cpp | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index c536cbf75e5c0..3e55a6a9cdc68 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -175,6 +175,9 @@ struct TargetFinder { RelSet Flags; Visitor(TargetFinder &Outer, RelSet Flags) : Outer(Outer), Flags(Flags) {} + void VisitCallExpr(const CallExpr *CE) { + Outer.add(CE->getCalleeDecl(), Flags); + } void VisitDeclRefExpr(const DeclRefExpr *DRE) { const Decl *D = DRE->getDecl(); // UsingShadowDecl allows us to record the UsingDecl. diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index f6e5fe723ec71..620eb3d6d3d69 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -114,6 +114,23 @@ TEST_F(TargetDeclTest, Exprs) { auto X = S() [[+]] S(); )cpp"; EXPECT_DECLS("DeclRefExpr", "S operator+(S) const"); + + Code = R"cpp( + int foo(); + int s = foo[[()]]; + )cpp"; + EXPECT_DECLS("CallExpr", "int foo()"); + + Code = R"cpp( + struct X { + void operator()(int n); + }; + void test() { + X x; + x[[(123)]]; + } + )cpp"; + EXPECT_DECLS("CXXOperatorCallExpr", "void operator()(int n)"); } TEST_F(TargetDeclTest, UsingDecl) { From c13c5fea012e5bb5b0503f2c126b13f814e96873 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 27 Nov 2019 16:04:15 +0000 Subject: [PATCH 115/591] Remove a comment obsoleted by r227345. --- llvm/include/llvm/Support/CommandLine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 63784463e1718..faef60c4e47f4 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -1831,7 +1831,7 @@ void PrintHelpMessage(bool Hidden = false, bool Categorized = false); // /// Use this to get a StringMap to all registered named options -/// (e.g. -help). Note \p Map Should be an empty StringMap. +/// (e.g. -help). /// /// \return A reference to the StringMap used by the cl APIs to parse options. /// From 5e6b728763e46eb2ec7a55b7f5586dae90c5aadf Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 27 Nov 2019 11:11:28 -0500 Subject: [PATCH 116/591] [InstCombine] add tests for copysign; NFC --- llvm/test/Transforms/InstCombine/copysign.ll | 41 ++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/copysign.ll diff --git a/llvm/test/Transforms/InstCombine/copysign.ll b/llvm/test/Transforms/InstCombine/copysign.ll new file mode 100644 index 0000000000000..5d30f346807df --- /dev/null +++ b/llvm/test/Transforms/InstCombine/copysign.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare float @llvm.copysign.f32(float, float) +declare <3 x double> @llvm.copysign.v3f64(<3 x double>, <3 x double>) + +define float @positive_sign_arg(float %x) { +; CHECK-LABEL: @positive_sign_arg( +; CHECK-NEXT: [[R:%.*]] = call arcp float @llvm.copysign.f32(float [[X:%.*]], float 0.000000e+00) +; CHECK-NEXT: ret float [[R]] +; + %r = call arcp float @llvm.copysign.f32(float %x, float 0.0) + ret float %r +} + +define <3 x double> @positive_sign_arg_vec_splat(<3 x double> %x) { +; CHECK-LABEL: @positive_sign_arg_vec_splat( +; CHECK-NEXT: [[R:%.*]] = call ninf <3 x double> @llvm.copysign.v3f64(<3 x double> [[X:%.*]], <3 x double> ) +; CHECK-NEXT: ret <3 x double> [[R]] +; + %r = call ninf <3 x double> @llvm.copysign.v3f64(<3 x double> %x, <3 x double> ) + ret <3 x double> %r +} + +define float @negative_sign_arg(float %x) { +; CHECK-LABEL: @negative_sign_arg( +; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.copysign.f32(float [[X:%.*]], float -0.000000e+00) +; CHECK-NEXT: ret float [[R]] +; + %r = call nnan float @llvm.copysign.f32(float %x, float -0.0) + ret float %r +} + +define <3 x double> @negative_sign_arg_vec_splat(<3 x double> %x) { +; CHECK-LABEL: @negative_sign_arg_vec_splat( +; CHECK-NEXT: [[R:%.*]] = call fast <3 x double> @llvm.copysign.v3f64(<3 x double> [[X:%.*]], <3 x double> ) +; CHECK-NEXT: ret <3 x double> [[R]] +; + %r = call fast <3 x double> @llvm.copysign.v3f64(<3 x double> %x, <3 x double> ) + ret <3 x double> %r +} From f4bba07b87ce7ad60d908d2fe02abe88d2d48fa4 Mon Sep 17 00:00:00 2001 From: Mark Murray Date: Wed, 13 Nov 2019 16:57:28 +0000 Subject: [PATCH 117/591] [ARM][MVE][Intrinsics] Add MVE VABD intrinsics. Add unit tests. Summary: Add MVE VABD intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70545 --- clang/include/clang/Basic/arm_mve.td | 9 ++ clang/test/CodeGen/arm-mve-intrinsics/vabdq.c | 95 +++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsARM.td | 6 ++ llvm/lib/Target/ARM/ARMInstrMVE.td | 62 ++++++++++-- .../CodeGen/Thumb2/mve-intrinsics/vabdq.ll | 62 ++++++++++++ 5 files changed, 225 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vabdq.c create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index d8d199f464d93..0d827485ae406 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -28,6 +28,7 @@ foreach n = [ 2, 4 ] in { "Intrinsic::arm_mve_vld"#n#"q":$IRIntr)>; } + let params = T.Int in { def vaddq: Intrinsic; def vsubq: Intrinsic; @@ -41,6 +42,14 @@ def vsubqf: Intrinsic, } let params = T.Usual in { +def vabdq: Intrinsic $a, $b)>; +} + +let params = T.Usual in { +def vabdq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"abd_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; def vaddq_m: Intrinsic< Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), (IRInt<"add_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vabdq.c b/clang/test/CodeGen/arm-mve-intrinsics/vabdq.c new file mode 100644 index 0000000000000..a416bfb773e6b --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vabdq.c @@ -0,0 +1,95 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vabdq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vabd.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) +{ +#ifdef POLYMORPHIC + return vabdq(a, b); +#else /* POLYMORPHIC */ + return vabdq_s8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vabdq(a, b); +#else /* POLYMORPHIC */ + return vabdq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vabd.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vabdq_f32(float16x8_t a, float16x8_t b) +{ +#ifdef POLYMORPHIC + return vabdq(a, b); +#else /* POLYMORPHIC */ + return vabdq_f16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vabdq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabdq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vabdq_m_u16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vabdq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabdq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vabdq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabdq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abd.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vabdq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabdq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vabdq_m_f32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 31069666b1e9e..3a69b87ea342d 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -796,6 +796,9 @@ multiclass IntrinsicSignSuffix rets, list params = [], def _u: Intrinsic; } +def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; @@ -866,6 +869,9 @@ defm int_arm_mve_sqrshr: ARM_MVE_qrshift<[llvm_i32_ty]>; def int_arm_mve_lsll: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>; def int_arm_mve_asrl: ARM_MVE_qrshift_single<[llvm_i32_ty, llvm_i32_ty]>; +def int_arm_mve_vabd: Intrinsic< + [llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_arm_mve_vadc: Intrinsic< [llvm_anyvector_ty, llvm_i32_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 31fee84596b3c..9c658047dba6f 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1664,7 +1664,8 @@ let Predicates = [HasMVEInt] in { } -class MVE_VABD_int size, list pattern=[]> +class MVE_VABD_int size, + list pattern=[]> : MVE_int<"vabd", suffix, size, pattern> { let Inst{28} = U; @@ -1676,12 +1677,35 @@ class MVE_VABD_int size, list pattern=[]> let validForTailPredication = 1; } -def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; -def MVE_VABDs16 : MVE_VABD_int<"s16", 0b0, 0b01>; -def MVE_VABDs32 : MVE_VABD_int<"s32", 0b0, 0b10>; -def MVE_VABDu8 : MVE_VABD_int<"u8", 0b1, 0b00>; -def MVE_VABDu16 : MVE_VABD_int<"u16", 0b1, 0b01>; -def MVE_VABDu32 : MVE_VABD_int<"u32", 0b1, 0b10>; +multiclass MVE_VABD_m { + def "" : MVE_VABD_int; + + let Predicates = [HasMVEInt] in { + // Unpredicated absolute difference + def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated absolute difference + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 1), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD + : MVE_VABD_m; + +defm MVE_VABDs8 : MVE_VABD; +defm MVE_VABDs16 : MVE_VABD; +defm MVE_VABDs32 : MVE_VABD; +defm MVE_VABDu8 : MVE_VABD; +defm MVE_VABDu16 : MVE_VABD; +defm MVE_VABDu32 : MVE_VABD; class MVE_VRHADD size, list pattern=[]> : MVE_int<"vrhadd", suffix, size, pattern> { @@ -2950,8 +2974,28 @@ class MVE_VABD_fp let validForTailPredication = 1; } -def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>; -def MVE_VABDf16 : MVE_VABD_fp<"f16", 0b1>; +multiclass MVE_VABDT_fp_m { + def "" : MVE_VABD_fp; + + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 1), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +multiclass MVE_VABD_fp_m + : MVE_VABDT_fp_m; + +defm MVE_VABDf32 : MVE_VABD_fp_m; +defm MVE_VABDf16 : MVE_VABD_fp_m; class MVE_VCVT_fix pattern=[]> diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll new file mode 100644 index 0000000000000..bafff00ea1de9 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vabdq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vabdq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vabd.s32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>%a, <4 x i32>%b) + ret <4 x i32> %0 +} + +declare <4 x i32> @llvm.arm.mve.vabd.v4i32(<4 x i32>, <4 x i32>) + +define arm_aapcs_vfpcc <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vabdq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vabd.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>%a, <4 x float>%b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.arm.mve.vabd.v4f32(<4 x float>, <4 x float>) + +define arm_aapcs_vfpcc <16 x i8> @test_vabdq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabdt.s8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.abd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vabdq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vabdq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabdt.f16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.abd.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) From e8a8dbe9c45869c37712652f5d0244414021de36 Mon Sep 17 00:00:00 2001 From: Mark Murray Date: Mon, 25 Nov 2019 14:10:59 +0000 Subject: [PATCH 118/591] [ARM][MVE][Intrinsics] Add MVE VMUL intrinsics. Remove annoying "t1" from VMUL* instructions. Add unit tests. Summary: Add MVE VMUL intrinsics. Remove annoying "t1" from VMUL* instructions. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70546 --- clang/include/clang/Basic/arm_mve.td | 6 + clang/include/clang/Basic/arm_mve_defs.td | 2 + clang/test/CodeGen/arm-mve-intrinsics/vmulq.c | 125 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsARM.td | 3 + llvm/lib/Target/ARM/ARMInstrMVE.td | 70 +++++++--- .../CodeGen/Thumb2/LowOverheadLoops/wlstp.mir | 12 +- .../CodeGen/Thumb2/mve-intrinsics/vmulq.ll | 58 ++++++++ .../unittests/Target/ARM/MachineInstrTest.cpp | 6 +- 8 files changed, 252 insertions(+), 30 deletions(-) create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vmulq.c create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulq.ll diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 0d827485ae406..2e47807ecce80 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -32,6 +32,7 @@ foreach n = [ 2, 4 ] in { let params = T.Int in { def vaddq: Intrinsic; def vsubq: Intrinsic; +def vmulq: Intrinsic; } let params = T.Float in { @@ -39,6 +40,8 @@ def vaddqf: Intrinsic, NameOverride<"vaddq">; def vsubqf: Intrinsic, NameOverride<"vsubq">; +def vmulqf: Intrinsic, + NameOverride<"vmulq">; } let params = T.Usual in { @@ -56,6 +59,9 @@ def vaddq_m: Intrinsic< def vsubq_m: Intrinsic< Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), (IRInt<"sub_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +def vmulq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"mul_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; } let params = T.Int in { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 27cdada02ec4f..bd0459dc3b163 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -58,12 +58,14 @@ class CGHelperFn : IRBuilderBase { let prefix = func # "(Builder, "; } def add: IRBuilder<"CreateAdd">; +def mul: IRBuilder<"CreateMul">; def or: IRBuilder<"CreateOr">; def and: IRBuilder<"CreateAnd">; def sub: IRBuilder<"CreateSub">; def shl: IRBuilder<"CreateShl">; def lshr: IRBuilder<"CreateLShr">; def fadd: IRBuilder<"CreateFAdd">; +def fmul: IRBuilder<"CreateFMul">; def fsub: IRBuilder<"CreateFSub">; def load: IRBuilder<"CreateLoad"> { let special_params = [IRBuilderAddrParam<0>]; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c new file mode 100644 index 0000000000000..ac457cba81ebc --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmulq.c @@ -0,0 +1,125 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vmulq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = mul <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) +{ +#ifdef POLYMORPHIC + return vmulq(a, b); +#else /* POLYMORPHIC */ + return vmulq_u8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = mul <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vmulq(a, b); +#else /* POLYMORPHIC */ + return vmulq_s16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vmulq(a, b); +#else /* POLYMORPHIC */ + return vmulq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = fmul <4 x float> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vmulq(a, b); +#else /* POLYMORPHIC */ + return vmulq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmulq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mul.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmulq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulq_m_u16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmulq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulq_m_s32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmulq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vmulq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmulq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmulq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 3a69b87ea342d..ff022fd3435ba 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -805,6 +805,9 @@ def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty], def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_mul_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; defm int_arm_mve_minv: IntrinsicSignSuffix<[llvm_i32_ty], [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 9c658047dba6f..6d4cca5137bd5 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1512,8 +1512,9 @@ class MVE_int size, list pattern=[]> let Inst{3-1} = Qm{2-0}; } -class MVE_VMULt1 size, list pattern=[]> - : MVE_int<"vmul", suffix, size, pattern> { +class MVE_VMULt1 size, + list pattern=[]> + : MVE_int { let Inst{28} = 0b0; let Inst{25-23} = 0b110; @@ -1524,19 +1525,33 @@ class MVE_VMULt1 size, list pattern=[]> let validForTailPredication = 1; } -def MVE_VMULt1i8 : MVE_VMULt1<"i8", 0b00>; -def MVE_VMULt1i16 : MVE_VMULt1<"i16", 0b01>; -def MVE_VMULt1i32 : MVE_VMULt1<"i32", 0b10>; +multiclass MVE_VMUL_m { + def "" : MVE_VMULt1; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (mul (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMULt1i8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (mul (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMULt1i16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (mul (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMULt1i32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + let Predicates = [HasMVEInt] in { + // Unpredicated multiply + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + + // Predicated multiply + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 1), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VMUL + : MVE_VMUL_m<"vmul", VTI, mul, int_arm_mve_mul_predicated>; + +defm MVE_VMULi8 : MVE_VMUL; +defm MVE_VMULi16 : MVE_VMUL; +defm MVE_VMULi32 : MVE_VMUL; + class MVE_VQxDMULH size, bit rounding, list pattern=[]> : MVE_int { @@ -2805,8 +2820,8 @@ class MVEFloatArithNeon pattern=[]> - : MVEFloatArithNeon<"vmul", suffix, size, (outs MQPR:$Qd), +class MVE_VMUL_fp pattern=[]> + : MVEFloatArithNeon { bits<4> Qd; @@ -2824,16 +2839,29 @@ class MVE_VMUL_fp pattern=[]> let validForTailPredication = 1; } -def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>; -def MVE_VMULf16 : MVE_VMUL_fp<"f16", 0b1>; +multiclass MVE_VMULT_fp_m { + def "" : MVE_VMUL_fp; -let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (fmul (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), - (v4f32 (MVE_VMULf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; - def : Pat<(v8f16 (fmul (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), - (v8f16 (MVE_VMULf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 1), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VMUL_fp_m + : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>; + +defm MVE_VMULf32 : MVE_VMUL_fp_m; +defm MVE_VMULf16 : MVE_VMUL_fp_m; + class MVE_VCMLA pattern=[]> : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot), diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir index 99f6e39d3712e..33389f4c2941c 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -211,7 +211,7 @@ body: | ; CHECK: renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14, $noreg, $noreg ; CHECK: renamable $r12 = t2ADDri killed renamable $r12, 16, 14, $noreg, $noreg ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg - ; CHECK: renamable $q0 = MVE_VMULt1i8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 0, killed $noreg :: (store 16 into %ir.scevgep1, align 1) ; CHECK: $lr = MVE_LETP renamable $lr, %bb.2 ; CHECK: bb.3.for.cond.cleanup: @@ -252,7 +252,7 @@ body: | renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14, $noreg, $noreg renamable $r12 = t2ADDri killed renamable $r12, 16, 14, $noreg, $noreg renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg - renamable $q0 = MVE_VMULt1i8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 1, killed renamable $vpr :: (store 16 into %ir.scevgep1, align 1) renamable $lr = t2LoopDec killed renamable $lr, 1 @@ -325,7 +325,7 @@ body: | ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3 ; CHECK: renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 0, $noreg :: (load 16 from %ir.lsr.iv57, align 2) ; CHECK: renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 0, $noreg :: (load 16 from %ir.lsr.iv24, align 2) - ; CHECK: renamable $q0 = MVE_VMULt1i16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $q0 = MVE_VMULi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 2) ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg ; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 16, 14, $noreg @@ -358,7 +358,7 @@ body: | MVE_VPST 4, implicit $vpr renamable $q0 = MVE_VLDRHU16 renamable $r1, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv57, align 2) renamable $q1 = MVE_VLDRHU16 renamable $r2, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 2) - renamable $q0 = MVE_VMULt1i16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 + renamable $q0 = MVE_VMULi16 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 MVE_VPST 8, implicit $vpr MVE_VSTRHU16 killed renamable $q0, renamable $r0, 0, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 2) renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg @@ -441,7 +441,7 @@ body: | ; CHECK: renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 0, $noreg :: (load 16 from %ir.lsr.iv24, align 4) ; CHECK: renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 0, killed $noreg :: (load 16 from %ir.lsr.iv1, align 4) ; CHECK: $r3 = tMOVr $r2, 14, $noreg - ; CHECK: renamable $q1 = nsw MVE_VMULt1i32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + ; CHECK: renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg ; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed $r2, 4, 14, $noreg @@ -490,7 +490,7 @@ body: | renamable $q1 = MVE_VLDRWU32 renamable $r0, 0, 1, renamable $vpr :: (load 16 from %ir.lsr.iv24, align 4) renamable $q2 = MVE_VLDRWU32 renamable $r1, 0, 1, killed renamable $vpr :: (load 16 from %ir.lsr.iv1, align 4) $r3 = tMOVr $r2, 14, $noreg - renamable $q1 = nsw MVE_VMULt1i32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 + renamable $q1 = nsw MVE_VMULi32 killed renamable $q2, killed renamable $q1, 0, $noreg, undef renamable $q1 renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg renamable $r2, dead $cpsr = tSUBi8 killed $r2, 4, 14, $noreg diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulq.ll new file mode 100644 index 0000000000000..09d8e11a71aed --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmulq.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @test_vmulq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmulq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmul.i32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = mul <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_vmulq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmul.f32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = fmul <4 x float> %b, %a + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmulq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vmulq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmult.i8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) + +declare <16 x i8> @llvm.arm.mve.mul.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) + +define arm_aapcs_vfpcc <8 x half> @test_vmulq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { +; CHECK-LABEL: test_vmulq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmult.f16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) + +declare <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp index 8e491d9ef1d67..8ca864aa8c635 100644 --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -250,9 +250,9 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VMUL_qr_i8: case MVE_VMULf16: case MVE_VMULf32: - case MVE_VMULt1i16: - case MVE_VMULt1i8: - case MVE_VMULt1i32: + case MVE_VMULi16: + case MVE_VMULi8: + case MVE_VMULi32: case MVE_VMVN: case MVE_VMVNimmi16: case MVE_VMVNimmi32: From a048bf87fb652c0fdfd5936965fc72bcef0dfea2 Mon Sep 17 00:00:00 2001 From: Mark Murray Date: Fri, 15 Nov 2019 11:30:15 +0000 Subject: [PATCH 119/591] [ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 --- clang/include/clang/Basic/arm_mve.td | 41 +++++++ clang/include/clang/Basic/arm_mve_defs.td | 2 + clang/test/CodeGen/arm-mve-intrinsics/vandq.c | 72 ++++++++++++ clang/test/CodeGen/arm-mve-intrinsics/vbicq.c | 74 ++++++++++++ clang/test/CodeGen/arm-mve-intrinsics/veorq.c | 72 ++++++++++++ clang/test/CodeGen/arm-mve-intrinsics/vornq.c | 74 ++++++++++++ clang/test/CodeGen/arm-mve-intrinsics/vorrq.c | 72 ++++++++++++ llvm/include/llvm/IR/IntrinsicsARM.td | 15 +++ llvm/lib/Target/ARM/ARMInstrMVE.td | 98 ++++++++-------- .../CodeGen/Thumb2/mve-intrinsics/vandq.ll | 104 +++++++++++++++++ .../CodeGen/Thumb2/mve-intrinsics/vbicq.ll | 108 ++++++++++++++++++ .../CodeGen/Thumb2/mve-intrinsics/veorq.ll | 104 +++++++++++++++++ .../CodeGen/Thumb2/mve-intrinsics/vornq.ll | 108 ++++++++++++++++++ .../CodeGen/Thumb2/mve-intrinsics/vorrq.ll | 104 +++++++++++++++++ 14 files changed, 1003 insertions(+), 45 deletions(-) create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vandq.c create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vbicq.c create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/veorq.c create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vornq.c create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vorrq.c create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vbicq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/veorq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vornq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vorrq.ll diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 2e47807ecce80..dfd8097f0644f 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -28,9 +28,23 @@ foreach n = [ 2, 4 ] in { "Intrinsic::arm_mve_vld"#n#"q":$IRIntr)>; } +multiclass bit_op_fp { +def "": Intrinsic; +} + +multiclass bit_op_fp_with_inv { +def "": Intrinsic; +} let params = T.Int in { def vaddq: Intrinsic; +def vandq: Intrinsic; +def vbicq: Intrinsic; +def veorq: Intrinsic; +def vornq: Intrinsic; +def vorrq: Intrinsic; def vsubq: Intrinsic; def vmulq: Intrinsic; } @@ -38,17 +52,39 @@ def vmulq: Intrinsic; let params = T.Float in { def vaddqf: Intrinsic, NameOverride<"vaddq">; +defm vandqf: bit_op_fp, NameOverride<"vandq">; +defm vbicqf: bit_op_fp_with_inv, NameOverride<"vbicq">; +defm veorqf: bit_op_fp, NameOverride<"veorq">; +defm vornqf: bit_op_fp_with_inv, NameOverride<"vornq">; +defm vorrqf: bit_op_fp, NameOverride<"vorrq">; def vsubqf: Intrinsic, NameOverride<"vsubq">; def vmulqf: Intrinsic, NameOverride<"vmulq">; } +// The bitcasting below is not overcomplicating the IR because while +// Vector and UVector may be different vector types at the C level i.e. +// vectors of same size signed/unsigned ints. Once they're lowered +// to IR, they are just bit vectors with no sign at all, so the +// bitcasts will be automatically elided by IRBuilder. +multiclass predicated_bit_op_fp { +def "": Intrinsic + (bitcast $a, UVector), + (bitcast $b, UVector), + $pred, + (bitcast $inactive, UVector)), Vector)>; +} + +// Plain intrinsics let params = T.Usual in { def vabdq: Intrinsic $a, $b)>; } +// Predicated intrinsics let params = T.Usual in { def vabdq_m: Intrinsic< Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), @@ -62,6 +98,11 @@ def vsubq_m: Intrinsic< def vmulq_m: Intrinsic< Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), (IRInt<"mul_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +defm vandq_m: predicated_bit_op_fp<"and_predicated">; +defm vbicq_m: predicated_bit_op_fp<"bic_predicated">; +defm veorq_m: predicated_bit_op_fp<"eor_predicated">; +defm vornq_m: predicated_bit_op_fp<"orn_predicated">; +defm vorrq_m: predicated_bit_op_fp<"orr_predicated">; } let params = T.Int in { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index bd0459dc3b163..c0ed80d456a5f 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -59,8 +59,10 @@ class CGHelperFn : IRBuilderBase { } def add: IRBuilder<"CreateAdd">; def mul: IRBuilder<"CreateMul">; +def not: IRBuilder<"CreateNot">; def or: IRBuilder<"CreateOr">; def and: IRBuilder<"CreateAnd">; +def xor: IRBuilder<"CreateXor">; def sub: IRBuilder<"CreateSub">; def shl: IRBuilder<"CreateShl">; def lshr: IRBuilder<"CreateLShr">; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vandq.c b/clang/test/CodeGen/arm-mve-intrinsics/vandq.c new file mode 100644 index 0000000000000..aeab8b7063ece --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vandq.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vandq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vandq(a, b); +#else /* POLYMORPHIC */ + return vandq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vandq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float32x4_t test_vandq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vandq(a, b); +#else /* POLYMORPHIC */ + return vandq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vandq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vandq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vandq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vandq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vandq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_vandq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vandq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vandq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c new file mode 100644 index 0000000000000..3106b40a322d1 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vbicq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], [[TMP0]] +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vbicq(a, b); +#else /* POLYMORPHIC */ + return vbicq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[TMP0]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float32x4_t test_vbicq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vbicq(a, b); +#else /* POLYMORPHIC */ + return vbicq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vbicq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vbicq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_vbicq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vbicq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/veorq.c b/clang/test/CodeGen/arm-mve-intrinsics/veorq.c new file mode 100644 index 0000000000000..c271568f791f3 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/veorq.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_veorq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return veorq(a, b); +#else /* POLYMORPHIC */ + return veorq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_veorq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float32x4_t test_veorq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return veorq(a, b); +#else /* POLYMORPHIC */ + return veorq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_veorq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.eor.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_veorq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return veorq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return veorq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_veorq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.eor.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_veorq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return veorq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return veorq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vornq.c b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c new file mode 100644 index 0000000000000..753a6ddf2ee17 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c @@ -0,0 +1,74 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vornq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], +// CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[A:%.*]], [[TMP0]] +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vornq(a, b); +#else /* POLYMORPHIC */ + return vornq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vornq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], +// CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP0]], [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float32x4_t test_vornq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vornq(a, b); +#else /* POLYMORPHIC */ + return vornq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vornq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vornq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vornq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vornq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vornq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_vornq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vornq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vornq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vorrq.c b/clang/test/CodeGen/arm-mve-intrinsics/vorrq.c new file mode 100644 index 0000000000000..436f6277e073f --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vorrq.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vorrq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vorrq(a, b); +#else /* POLYMORPHIC */ + return vorrq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float32x4_t test_vorrq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vorrq(a, b); +#else /* POLYMORPHIC */ + return vorrq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vorrq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vorrq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[INACTIVE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i1> [[TMP3]], <8 x i16> [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP6]] +// +float16x8_t test_vorrq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vorrq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index ff022fd3435ba..bd61bf13c54d2 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -802,6 +802,21 @@ def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty], def int_arm_mve_add_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_and_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_bic_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_eor_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_orn_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_orr_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; def int_arm_mve_sub_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 6d4cca5137bd5..df38503458987 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1233,53 +1233,61 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f (MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>; } -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), - (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), - (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), - (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), - (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), - (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - - def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), - (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), - (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), - (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), - (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; +multiclass MVE_bit_op { + let Predicates = [HasMVEInt] in { + // Unpredicated operation + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + // Predicated operation + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (instruction + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 1), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; + +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; + +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; +defm : MVE_bit_op; + +multiclass MVE_bit_op_with_inv { + let Predicates = [HasMVEInt] in { + // Unpredicated operation + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (vnotq (VTI.Vec MQPR:$Qn)))), + (VTI.Vec (instruction (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; + // Predicated operation + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (instruction + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 1), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } +} + +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; + +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; +defm : MVE_bit_op_with_inv; + class MVE_bit_cmode cmode, dag inOps> : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary, iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> { diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll new file mode 100644 index 0000000000000..1b1d498bc378d --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vandq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = and <16 x i8> %b, %a + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vandq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vandq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = and <8 x i16> %b, %a + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vandq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vandq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = and <4 x i32> %1, %0 + %3 = bitcast <4 x i32> %2 to <4 x float> + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vandq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vandq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_vandq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vandq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_vandq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vandq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vandt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vbicq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vbicq.ll new file mode 100644 index 0000000000000..47877a13cb96e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vbicq.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vbicq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %b, + %1 = and <16 x i8> %0, %a + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vbicq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %b, + %1 = and <4 x i32> %0, %a + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vbicq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %b, + %1 = and <8 x i16> %0, %a + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x float> @test_vbicq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vbicq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = xor <4 x i32> %1, + %3 = and <4 x i32> %2, %0 + %4 = bitcast <4 x i32> %3 to <4 x float> + ret <4 x float> %4 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vbicq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vbicq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.bic.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vbicq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.bic.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_vbicq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vbicq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.bic.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.bic.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/veorq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/veorq.ll new file mode 100644 index 0000000000000..9b66f3656eb27 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/veorq.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_veorq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %b, %a + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_veorq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_veorq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %b, %a + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_veorq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_veorq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = xor <4 x i32> %1, %0 + %3 = bitcast <4 x i32> %2 to <4 x float> + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <16 x i8> @test_veorq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_veorq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: veort q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.eor.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.eor.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_veorq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_veorq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: veort q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.eor.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.eor.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_veorq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_veorq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: veort q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.eor.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.eor.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vornq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vornq.ll new file mode 100644 index 0000000000000..48f6a3cd23ad2 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vornq.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vornq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <16 x i8> %b, + %1 = or <16 x i8> %0, %a + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vornq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <4 x i32> %b, + %1 = or <4 x i32> %0, %a + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vornq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <8 x i16> %b, + %1 = or <8 x i16> %0, %a + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x float> @test_vornq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vornq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = xor <4 x i32> %1, + %3 = or <4 x i32> %2, %0 + %4 = bitcast <4 x i32> %3 to <4 x float> + ret <4 x float> %4 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vornq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vornq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vornt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.orn.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_vornq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vornq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vornt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.orn.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_vornq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vornq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vornt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.orn.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.orn.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vorrq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vorrq.ll new file mode 100644 index 0000000000000..ccb511a85e571 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vorrq.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vorrq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = or <16 x i8> %b, %a + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vorrq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %b, %a + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vorrq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = or <8 x i16> %b, %a + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x float> @test_vorrq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vorrq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = or <4 x i32> %1, %0 + %3 = bitcast <4 x i32> %2 to <4 x float> + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vorrq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vorrq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.orr.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vorrq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.orr.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +; Function Attrs: nounwind readnone +define arm_aapcs_vfpcc <8 x half> @test_vorrq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vorrq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = bitcast <4 x float> %a to <4 x i32> + %1 = bitcast <4 x float> %b to <4 x i32> + %2 = zext i16 %p to i32 + %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2) + %4 = bitcast <4 x float> %inactive to <4 x i32> + %5 = tail call <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> %4) + %6 = bitcast <4 x i32> %5 to <8 x half> + ret <8 x half> %6 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.orr.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 From 3d9b1128d609323dd4017250be0f6def94495205 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 25 Nov 2019 11:02:56 -0800 Subject: [PATCH 120/591] [ELF][ARM] Add getPCBias() ThunkCreator::getThunk and ThunkCreator::normalizeExistingThunk currently assume that the implicit addends are -8 for ARM and -4 for Thumb. In D70637, ThunkCreator::getThunk will need to take care of the relocation addend explicitly. Add the utility function getPCBias() as a prerequisite so that the getThunk change in D70637 can be more general. Reviewed By: peter.smith Differential Revision: https://reviews.llvm.org/D70690 --- lld/ELF/Relocations.cpp | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 1b8dacb36627d..60ea1119aaf6e 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1763,6 +1763,19 @@ static bool isThunkSectionCompatible(InputSection *source, return true; } +static int64_t getPCBias(RelType type) { + if (config->emachine != EM_ARM) + return 0; + switch (type) { + case R_ARM_THM_JUMP19: + case R_ARM_THM_JUMP24: + case R_ARM_THM_CALL: + return 4; + default: + return 8; + } +} + std::pair ThunkCreator::getThunk(InputSection *isec, Relocation &rel, uint64_t src) { std::vector *thunkVec = nullptr; @@ -1779,7 +1792,9 @@ std::pair ThunkCreator::getThunk(InputSection *isec, for (Thunk *t : *thunkVec) if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) && t->isCompatibleWith(*isec, rel) && - target->inBranchRange(rel.type, src, t->getThunkTargetSym()->getVA())) + target->inBranchRange(rel.type, src, + t->getThunkTargetSym()->getVA(rel.addend) + + getPCBias(rel.type))) return std::make_pair(t, false); // No existing compatible Thunk in range, create a new one @@ -1794,7 +1809,8 @@ std::pair ThunkCreator::getThunk(InputSection *isec, // relocation back to its original non-Thunk target. bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) { if (Thunk *t = thunks.lookup(rel.sym)) { - if (target->inBranchRange(rel.type, src, rel.sym->getVA())) + if (target->inBranchRange(rel.type, src, + rel.sym->getVA(rel.addend) + getPCBias(rel.type))) return true; rel.sym = &t->destination; if (rel.sym->isInPlt()) From bcd0798c47ca865f95226859893016a17402441e Mon Sep 17 00:00:00 2001 From: Gabor Horvath Date: Wed, 27 Nov 2019 09:08:51 -0800 Subject: [PATCH 121/591] [LifetimeAnalysis] Fix PR44150 References need somewhat special treatment. While copying a gsl::Pointer will propagate the points-to set, creating an object from a reference often behaves more like a dereference operation. Differential Revision: https://reviews.llvm.org/D70755 --- clang/lib/Sema/SemaInit.cpp | 33 +++++++++++++++---- .../Sema/warn-lifetime-analysis-nocfg.cpp | 5 +++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 80d7cfed711a8..7421754d95caa 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -6653,6 +6653,7 @@ struct IndirectLocalPathEntry { VarInit, LValToRVal, LifetimeBoundCall, + GslReferenceInit, GslPointerInit } Kind; Expr *E; @@ -6783,12 +6784,24 @@ static bool shouldTrackFirstArgument(const FunctionDecl *FD) { static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call, LocalVisitor Visit) { - auto VisitPointerArg = [&](const Decl *D, Expr *Arg) { + auto VisitPointerArg = [&](const Decl *D, Expr *Arg, bool Value) { // We are not interested in the temporary base objects of gsl Pointers: // Temp().ptr; // Here ptr might not dangle. if (isa(Arg->IgnoreImpCasts())) return; - Path.push_back({IndirectLocalPathEntry::GslPointerInit, Arg, D}); + // Once we initialized a value with a reference, it can no longer dangle. + if (!Value) { + for (auto It = Path.rbegin(), End = Path.rend(); It != End; ++It) { + if (It->Kind == IndirectLocalPathEntry::GslReferenceInit) + continue; + if (It->Kind == IndirectLocalPathEntry::GslPointerInit) + return; + break; + } + } + Path.push_back({Value ? IndirectLocalPathEntry::GslPointerInit + : IndirectLocalPathEntry::GslReferenceInit, + Arg, D}); if (Arg->isGLValue()) visitLocalsRetainedByReferenceBinding(Path, Arg, RK_ReferenceBinding, Visit, @@ -6802,18 +6815,21 @@ static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call, if (auto *MCE = dyn_cast(Call)) { const auto *MD = cast_or_null(MCE->getDirectCallee()); if (MD && shouldTrackImplicitObjectArg(MD)) - VisitPointerArg(MD, MCE->getImplicitObjectArgument()); + VisitPointerArg(MD, MCE->getImplicitObjectArgument(), + !MD->getReturnType()->isReferenceType()); return; } else if (auto *OCE = dyn_cast(Call)) { FunctionDecl *Callee = OCE->getDirectCallee(); if (Callee && Callee->isCXXInstanceMember() && shouldTrackImplicitObjectArg(cast(Callee))) - VisitPointerArg(Callee, OCE->getArg(0)); + VisitPointerArg(Callee, OCE->getArg(0), + !Callee->getReturnType()->isReferenceType()); return; } else if (auto *CE = dyn_cast(Call)) { FunctionDecl *Callee = CE->getDirectCallee(); if (Callee && shouldTrackFirstArgument(Callee)) - VisitPointerArg(Callee, CE->getArg(0)); + VisitPointerArg(Callee, CE->getArg(0), + !Callee->getReturnType()->isReferenceType()); return; } @@ -6821,7 +6837,7 @@ static void handleGslAnnotatedTypes(IndirectLocalPath &Path, Expr *Call, const auto *Ctor = CCE->getConstructor(); const CXXRecordDecl *RD = Ctor->getParent(); if (CCE->getNumArgs() > 0 && RD->hasAttr()) - VisitPointerArg(Ctor->getParamDecl(0), CCE->getArgs()[0]); + VisitPointerArg(Ctor->getParamDecl(0), CCE->getArgs()[0], true); } } @@ -7287,6 +7303,7 @@ static SourceRange nextPathEntryRange(const IndirectLocalPath &Path, unsigned I, case IndirectLocalPathEntry::AddressOf: case IndirectLocalPathEntry::LValToRVal: case IndirectLocalPathEntry::LifetimeBoundCall: + case IndirectLocalPathEntry::GslReferenceInit: case IndirectLocalPathEntry::GslPointerInit: // These exist primarily to mark the path as not permitting or // supporting lifetime extension. @@ -7309,7 +7326,8 @@ static bool pathOnlyInitializesGslPointer(IndirectLocalPath &Path) { continue; if (It->Kind == IndirectLocalPathEntry::AddressOf) continue; - return It->Kind == IndirectLocalPathEntry::GslPointerInit; + return It->Kind == IndirectLocalPathEntry::GslPointerInit || + It->Kind == IndirectLocalPathEntry::GslReferenceInit; } return false; } @@ -7532,6 +7550,7 @@ void Sema::checkInitializerLifetime(const InitializedEntity &Entity, case IndirectLocalPathEntry::LifetimeBoundCall: case IndirectLocalPathEntry::GslPointerInit: + case IndirectLocalPathEntry::GslReferenceInit: // FIXME: Consider adding a note for these. break; diff --git a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp index 8ba7686944468..3319d5aa2db8c 100644 --- a/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp +++ b/clang/test/Sema/warn-lifetime-analysis-nocfg.cpp @@ -450,3 +450,8 @@ MyIntPointer handleDerivedToBaseCast1(MySpecialIntPointer ptr) { MyIntPointer handleDerivedToBaseCast2(MyOwnerIntPointer ptr) { return ptr; // expected-warning {{address of stack memory associated with parameter 'ptr' returned}} } + +std::vector::iterator noFalsePositiveWithVectorOfPointers() { + std::vector::iterator> iters; + return iters.at(0); +} From 0d3d4d3b0fc57e577a8f80261bd4390c6cb7c040 Mon Sep 17 00:00:00 2001 From: Kostya Kortchinsky Date: Mon, 25 Nov 2019 10:28:57 -0800 Subject: [PATCH 122/591] [scudo][standalone] Make tests work on Fuchsia Summary: This CL makes unit tests compatible with Fuchsia's zxtest. This required a few changes here and there, but also unearthed some incompatibilities that had to be addressed. A header is introduced to allow to account for the zxtest/gtest differences, some `#if SCUDO_FUCHSIA` are used to disable incompatible code (the 32-bit primary, or the exclusive TSD). It also brought to my attention that I was using `__scudo_default_options` in different tests, which ended up in a single binary, and I am not sure how that ever worked. So move this to the main cpp. Additionally fully disable the secondary freelist on Fuchsia as we do not track VMOs for secondary allocations, so no release possible. With some modifications to Scudo's BUILD.gn in Fuchsia: ``` [==========] 79 tests from 23 test cases ran (10280 ms total). [ PASSED ] 79 tests ``` Reviewers: mcgrathr, phosek, hctim, pcc, eugenis, cferris Subscribers: srhines, jfb, #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D70682 --- .../lib/scudo/standalone/allocator_config.h | 2 +- compiler-rt/lib/scudo/standalone/secondary.h | 12 +++-- .../scudo/standalone/tests/atomic_test.cpp | 5 ++- .../scudo/standalone/tests/bytemap_test.cpp | 5 ++- .../scudo/standalone/tests/checksum_test.cpp | 4 +- .../lib/scudo/standalone/tests/chunk_test.cpp | 4 +- .../scudo/standalone/tests/combined_test.cpp | 45 ++++++++----------- .../lib/scudo/standalone/tests/flags_test.cpp | 4 +- .../lib/scudo/standalone/tests/list_test.cpp | 5 ++- .../lib/scudo/standalone/tests/map_test.cpp | 9 ++-- .../lib/scudo/standalone/tests/mutex_test.cpp | 5 ++- .../scudo/standalone/tests/primary_test.cpp | 15 +++++-- .../standalone/tests/quarantine_test.cpp | 5 ++- .../scudo/standalone/tests/release_test.cpp | 5 ++- .../scudo/standalone/tests/report_test.cpp | 8 ++-- .../scudo/standalone/tests/scudo_unit_test.h | 29 ++++++++++++ .../standalone/tests/scudo_unit_test_main.cpp | 18 +++++++- .../scudo/standalone/tests/secondary_test.cpp | 13 ++++-- .../standalone/tests/size_class_map_test.cpp | 5 ++- .../lib/scudo/standalone/tests/stats_test.cpp | 5 ++- .../scudo/standalone/tests/strings_test.cpp | 5 ++- .../lib/scudo/standalone/tests/tsd_test.cpp | 8 +++- .../scudo/standalone/tests/vector_test.cpp | 4 +- .../standalone/tests/wrappers_c_test.cpp | 18 ++++---- .../standalone/tests/wrappers_cpp_test.cpp | 9 +--- 25 files changed, 157 insertions(+), 90 deletions(-) create mode 100644 compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h index 166e19e2b8f28..1d00a5d76d04d 100644 --- a/compiler-rt/lib/scudo/standalone/allocator_config.h +++ b/compiler-rt/lib/scudo/standalone/allocator_config.h @@ -67,7 +67,7 @@ struct AndroidSvelteConfig { struct FuchsiaConfig { // 1GB Regions typedef SizeClassAllocator64 Primary; - typedef MapAllocator<> Secondary; + typedef MapAllocator<0U> Secondary; template using TSDRegistryT = TSDRegistrySharedT; // Shared, max 8 TSDs. }; diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index f288fc7d7592b..d44d2aeaf686a 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -50,6 +50,10 @@ static Header *getHeader(const void *Ptr) { template class MapAllocator { public: + // Ensure the freelist is disabled on Fuchsia, since it doesn't support + // releasing Secondary blocks yet. + COMPILER_CHECK(!SCUDO_FUCHSIA || MaxFreeListSize == 0U); + void initLinkerInitialized(GlobalStats *S) { Stats.initLinkerInitialized(); if (LIKELY(S)) @@ -205,10 +209,11 @@ void *MapAllocator::allocate(uptr Size, uptr AlignmentHint, template void MapAllocator::deallocate(void *Ptr) { LargeBlock::Header *H = LargeBlock::getHeader(Ptr); + const uptr Block = reinterpret_cast(H); { ScopedLock L(Mutex); InUseBlocks.remove(H); - const uptr CommitSize = H->BlockEnd - reinterpret_cast(H); + const uptr CommitSize = H->BlockEnd - Block; FreedBytes += CommitSize; NumberOfFrees++; Stats.sub(StatAllocated, CommitSize); @@ -225,11 +230,10 @@ void MapAllocator::deallocate(void *Ptr) { if (!Inserted) FreeBlocks.push_back(H); const uptr RoundedAllocationStart = - roundUpTo(reinterpret_cast(H) + LargeBlock::getHeaderSize(), - getPageSizeCached()); + roundUpTo(Block + LargeBlock::getHeaderSize(), getPageSizeCached()); MapPlatformData Data = H->Data; // TODO(kostyak): use release_to_os_interval_ms - releasePagesToOS(H->MapBase, RoundedAllocationStart - H->MapBase, + releasePagesToOS(Block, RoundedAllocationStart - Block, H->BlockEnd - RoundedAllocationStart, &Data); return; } diff --git a/compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp b/compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp index 7e6f1d21f6e9c..103cd24624ba5 100644 --- a/compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/atomic_helpers.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "atomic_helpers.h" namespace scudo { diff --git a/compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp b/compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp index df0646bcd99d0..7db7feb6accdc 100644 --- a/compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "bytemap.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "bytemap.h" +#include #include template void testMap(T &Map, scudo::uptr Size) { diff --git a/compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp b/compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp index 43bbd47a3c35a..361d33c7e4641 100644 --- a/compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "checksum.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "checksum.h" #include diff --git a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp index 57e128ec82666..13da70eff85b8 100644 --- a/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "chunk.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "chunk.h" #include diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp index 9205467998ed1..849fa713ad1d2 100644 --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -6,14 +6,15 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "allocator_config.h" #include "combined.h" -#include "gtest/gtest.h" - #include #include #include +#include static std::mutex Mutex; static std::condition_variable Cv; @@ -21,17 +22,6 @@ static bool Ready = false; static constexpr scudo::Chunk::Origin Origin = scudo::Chunk::Origin::Malloc; -// This allows us to turn on the Quarantine for specific tests. The Quarantine -// parameters are on the low end, to avoid having to loop excessively in some -// tests. -static bool UseQuarantine = false; -extern "C" const char *__scudo_default_options() { - if (!UseQuarantine) - return ""; - return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:" - "quarantine_max_chunk_size=1024"; -} - template static void testAllocator() { using AllocatorT = scudo::Allocator; auto Deleter = [](AllocatorT *A) { @@ -168,15 +158,15 @@ template static void testAllocator() { } TEST(ScudoCombinedTest, BasicCombined) { - testAllocator(); -#if SCUDO_WORDSIZE == 64U + UseQuarantine = false; + testAllocator(); +#if SCUDO_FUCHSIA testAllocator(); -#endif - // The following configs should work on all platforms. +#else + testAllocator(); UseQuarantine = true; testAllocator(); - UseQuarantine = false; - testAllocator(); +#endif } template static void stressAllocator(AllocatorT *A) { @@ -223,20 +213,21 @@ template static void testAllocatorThreaded() { } TEST(ScudoCombinedTest, ThreadedCombined) { - testAllocatorThreaded(); -#if SCUDO_WORDSIZE == 64U + UseQuarantine = false; + testAllocatorThreaded(); +#if SCUDO_FUCHSIA testAllocatorThreaded(); -#endif +#else + testAllocatorThreaded(); UseQuarantine = true; testAllocatorThreaded(); - UseQuarantine = false; - testAllocatorThreaded(); +#endif } struct DeathConfig { // Tiny allocator, its Primary only serves chunks of 1024 bytes. using DeathSizeClassMap = scudo::SizeClassMap<1U, 10U, 10U, 10U, 1U, 10U>; - typedef scudo::SizeClassAllocator32 Primary; + typedef scudo::SizeClassAllocator64 Primary; typedef scudo::MapAllocator<0U> Secondary; template using TSDRegistryT = scudo::TSDRegistrySharedT; }; @@ -258,8 +249,8 @@ TEST(ScudoCombinedTest, DeathCombined) { // Invalid sized deallocation. EXPECT_DEATH(Allocator->deallocate(P, Origin, Size + 8U), ""); - // Misaligned pointer. - void *MisalignedP = + // Misaligned pointer. Potentially unused if EXPECT_DEATH isn't available. + UNUSED void *MisalignedP = reinterpret_cast(reinterpret_cast(P) | 1U); EXPECT_DEATH(Allocator->deallocate(MisalignedP, Origin, Size), ""); EXPECT_DEATH(Allocator->reallocate(MisalignedP, Size * 2U), ""); diff --git a/compiler-rt/lib/scudo/standalone/tests/flags_test.cpp b/compiler-rt/lib/scudo/standalone/tests/flags_test.cpp index 1c07bf13181c2..45918ad4d2ca0 100644 --- a/compiler-rt/lib/scudo/standalone/tests/flags_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/flags_test.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "flags.h" #include "flags_parser.h" -#include "gtest/gtest.h" - #include static const char FlagName[] = "flag_name"; diff --git a/compiler-rt/lib/scudo/standalone/tests/list_test.cpp b/compiler-rt/lib/scudo/standalone/tests/list_test.cpp index 0a0c050c98cd5..8e139916d0588 100644 --- a/compiler-rt/lib/scudo/standalone/tests/list_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/list_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/list.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "list.h" struct ListItem { ListItem *Next; diff --git a/compiler-rt/lib/scudo/standalone/tests/map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/map_test.cpp index ab5dd8ca5fd6a..7c40b73ff2544 100644 --- a/compiler-rt/lib/scudo/standalone/tests/map_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/map_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "common.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "common.h" #include #include @@ -31,11 +31,10 @@ TEST(ScudoMapTest, MapNoAccessUnmap) { TEST(ScudoMapTest, MapUnmap) { const scudo::uptr Size = 4 * scudo::getPageSizeCached(); - scudo::MapPlatformData Data = {}; - void *P = scudo::map(nullptr, Size, MappingName, 0, &Data); + void *P = scudo::map(nullptr, Size, MappingName, 0, nullptr); EXPECT_NE(P, nullptr); memset(P, 0xaa, Size); - scudo::unmap(P, Size, 0, &Data); + scudo::unmap(P, Size, 0, nullptr); EXPECT_DEATH(memset(P, 0xbb, Size), ""); } diff --git a/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp b/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp index c75ef8edb3666..ce715a19332f4 100644 --- a/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "mutex.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "mutex.h" +#include #include class TestData { diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 7da7b25ca67ed..64b625e79bf2d 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -6,15 +6,16 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "primary32.h" #include "primary64.h" #include "size_class_map.h" -#include "gtest/gtest.h" - #include #include #include +#include // Note that with small enough regions, the SizeClassAllocator64 also works on // 32-bit architectures. It's not something we want to encourage, but we still @@ -53,7 +54,9 @@ template static void testPrimary() { TEST(ScudoPrimaryTest, BasicPrimary) { using SizeClassMap = scudo::DefaultSizeClassMap; +#if !SCUDO_FUCHSIA testPrimary>(); +#endif testPrimary>(); } @@ -78,7 +81,7 @@ TEST(ScudoPrimaryTest, Primary64OOM) { AllocationFailed = true; break; } - for (scudo::uptr J = 0; J < B->getCount(); J++) + for (scudo::u32 J = 0; J < B->getCount(); J++) memset(B->get(J), 'B', Size); Batches.push_back(B); } @@ -136,7 +139,9 @@ template static void testIteratePrimary() { TEST(ScudoPrimaryTest, PrimaryIterate) { using SizeClassMap = scudo::DefaultSizeClassMap; +#if !SCUDO_FUCHSIA testIteratePrimary>(); +#endif testIteratePrimary>(); } @@ -193,7 +198,9 @@ template static void testPrimaryThreaded() { TEST(ScudoPrimaryTest, PrimaryThreaded) { using SizeClassMap = scudo::SvelteSizeClassMap; +#if !SCUDO_FUCHSIA testPrimaryThreaded>(); +#endif testPrimaryThreaded>(); } @@ -221,6 +228,8 @@ template static void testReleaseToOS() { TEST(ScudoPrimaryTest, ReleaseToOS) { using SizeClassMap = scudo::DefaultSizeClassMap; +#if !SCUDO_FUCHSIA testReleaseToOS>(); +#endif testReleaseToOS>(); } diff --git a/compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp b/compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp index 28baf8feb653f..0422c2ff3736b 100644 --- a/compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "quarantine.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "quarantine.h" +#include #include static void *FakePtr = reinterpret_cast(0xFA83FA83); diff --git a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp index 3776768e9a848..22d73d09d53d7 100644 --- a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp @@ -6,16 +6,17 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "list.h" #include "release.h" #include "size_class_map.h" -#include "gtest/gtest.h" - #include #include #include +#include TEST(ScudoReleaseTest, PackedCounterArray) { for (scudo::uptr I = 0; I < SCUDO_WORDSIZE; I++) { diff --git a/compiler-rt/lib/scudo/standalone/tests/report_test.cpp b/compiler-rt/lib/scudo/standalone/tests/report_test.cpp index c2f377d968491..09f03f1ac896d 100644 --- a/compiler-rt/lib/scudo/standalone/tests/report_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/report_test.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/report.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "report.h" TEST(ScudoReportTest, Generic) { - void *P = reinterpret_cast(0x42424242U); + // Potentially unused if EXPECT_DEATH isn't defined. + UNUSED void *P = reinterpret_cast(0x42424242U); EXPECT_DEATH(scudo::reportError("TEST123"), "Scudo ERROR.*TEST123"); EXPECT_DEATH(scudo::reportInvalidFlag("ABC", "DEF"), "Scudo ERROR.*ABC.*DEF"); EXPECT_DEATH(scudo::reportHeaderCorruption(P), "Scudo ERROR.*42424242"); diff --git a/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h b/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h new file mode 100644 index 0000000000000..55d039ef77c37 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h @@ -0,0 +1,29 @@ +//===-- scudo_unit_test.h ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "platform.h" + +#if SCUDO_FUCHSIA +#include +#else +#include "gtest/gtest.h" +#endif + +// If EXPECT_DEATH isn't defined, make it a no-op. +#ifndef EXPECT_DEATH +#define EXPECT_DEATH(X, Y) \ + do { \ + } while (0) +#endif + +// If EXPECT_STREQ isn't defined, define our own simple one. +#ifndef EXPECT_STREQ +#define EXPECT_STREQ(X, Y) EXPECT_EQ(strcmp(X, Y), 0) +#endif + +extern bool UseQuarantine; diff --git a/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp b/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp index 60bd5648eef71..e771924354edf 100644 --- a/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp @@ -6,9 +6,25 @@ // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +// This allows us to turn on/off a Quarantine for specific tests. The Quarantine +// parameters are on the low end, to avoid having to loop excessively in some +// tests. +bool UseQuarantine = true; +extern "C" __attribute__((visibility("default"))) const char * +__scudo_default_options() { + if (!UseQuarantine) + return "dealloc_type_mismatch=true"; + return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:" + "quarantine_max_chunk_size=512:dealloc_type_mismatch=true"; +} int main(int argc, char **argv) { +#if !SCUDO_FUCHSIA testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); +#else + return RUN_ALL_TESTS(argc, argv); +#endif } diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp index 047a61653cb2b..1e7dcec5861fe 100644 --- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "secondary.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "secondary.h" #include @@ -16,6 +16,7 @@ #include #include #include +#include template static void testSecondaryBasic(void) { scudo::GlobalStats S; @@ -54,12 +55,18 @@ template static void testSecondaryBasic(void) { } TEST(ScudoSecondaryTest, SecondaryBasic) { - testSecondaryBasic>(); testSecondaryBasic>(); +#if !SCUDO_FUCHSIA + testSecondaryBasic>(); testSecondaryBasic>(); +#endif } +#if SCUDO_FUCHSIA +using LargeAllocator = scudo::MapAllocator<0U>; +#else using LargeAllocator = scudo::MapAllocator<>; +#endif // This exercises a variety of combinations of size and alignment for the // MapAllocator. The size computation done here mimic the ones done by the diff --git a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp index 39babc14902e4..55850400a7650 100644 --- a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/size_class_map.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "size_class_map.h" template void testSizeClassMap() { typedef SizeClassMap SCMap; diff --git a/compiler-rt/lib/scudo/standalone/tests/stats_test.cpp b/compiler-rt/lib/scudo/standalone/tests/stats_test.cpp index 449c1491d5558..cdadfbad3cbc2 100644 --- a/compiler-rt/lib/scudo/standalone/tests/stats_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/stats_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/stats.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "stats.h" TEST(ScudoStatsTest, LocalStats) { scudo::LocalStats LStats; diff --git a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp index 3b1a5e8743e60..eed174dc586a4 100644 --- a/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/strings_test.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "scudo/standalone/string_utils.h" -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" + +#include "string_utils.h" #include diff --git a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp index 1941723d5d04f..b32c62fe6ca16 100644 --- a/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// +#include "tests/scudo_unit_test.h" + #include "tsd_exclusive.h" #include "tsd_shared.h" -#include "gtest/gtest.h" - #include #include #include @@ -108,7 +108,9 @@ template static void testRegistry() { TEST(ScudoTSDTest, TSDRegistryBasic) { testRegistry>(); testRegistry>(); +#if !SCUDO_FUCHSIA testRegistry>(); +#endif } static std::mutex Mutex; @@ -164,5 +166,7 @@ template static void testRegistryThreaded() { TEST(ScudoTSDTest, TSDRegistryThreaded) { testRegistryThreaded>(); testRegistryThreaded>(); +#if !SCUDO_FUCHSIA testRegistryThreaded>(); +#endif } diff --git a/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp b/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp index 946a44eee8e50..d2c6a9b6bb3cc 100644 --- a/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/vector_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "vector.h" +#include "tests/scudo_unit_test.h" -#include "gtest/gtest.h" +#include "vector.h" TEST(ScudoVectorTest, Basic) { scudo::Vector V; diff --git a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp index cb651f265f027..99e7aa2fa21cd 100644 --- a/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp @@ -6,10 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "platform.h" - -#include "gtest/gtest.h" +#include "tests/scudo_unit_test.h" +#include #include #include #include @@ -32,11 +31,6 @@ int malloc_iterate(uintptr_t base, size_t size, // We have to use a small quarantine to make sure that our double-free tests // trigger. Otherwise EXPECT_DEATH ends up reallocating the chunk that was just // freed (this depends on the size obviously) and the following free succeeds. -extern "C" __attribute__((visibility("default"))) const char * -__scudo_default_options() { - return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:" - "quarantine_max_chunk_size=512"; -} static const size_t Size = 100U; @@ -200,6 +194,7 @@ TEST(ScudoWrappersCTest, Realloc) { #define M_PURGE -101 #endif +#if !SCUDO_FUCHSIA TEST(ScudoWrappersCTest, MallOpt) { errno = 0; EXPECT_EQ(mallopt(-1000, 1), 0); @@ -213,8 +208,10 @@ TEST(ScudoWrappersCTest, MallOpt) { EXPECT_EQ(mallopt(M_DECAY_TIME, 1), 1); EXPECT_EQ(mallopt(M_DECAY_TIME, 0), 1); } +#endif TEST(ScudoWrappersCTest, OtherAlloc) { +#if !SCUDO_FUCHSIA const size_t PageSize = sysconf(_SC_PAGESIZE); void *P = pvalloc(Size); @@ -229,10 +226,12 @@ TEST(ScudoWrappersCTest, OtherAlloc) { EXPECT_NE(P, nullptr); EXPECT_EQ(reinterpret_cast(P) & (PageSize - 1), 0U); free(P); +#endif EXPECT_EQ(valloc(SIZE_MAX), nullptr); } +#if !SCUDO_FUCHSIA TEST(ScudoWrappersCTest, MallInfo) { const size_t BypassQuarantineSize = 1024U; @@ -248,6 +247,7 @@ TEST(ScudoWrappersCTest, MallInfo) { MI = mallinfo(); EXPECT_GE(static_cast(MI.fordblks), Free + BypassQuarantineSize); } +#endif static uintptr_t BoundaryP; static size_t Count; @@ -282,6 +282,7 @@ TEST(ScudoWrappersCTest, MallocIterateBoundary) { free(P); } +#if !SCUDO_FUCHSIA TEST(ScudoWrappersCTest, MallocInfo) { char Buffer[64]; FILE *F = fmemopen(Buffer, sizeof(Buffer), "w+"); @@ -292,3 +293,4 @@ TEST(ScudoWrappersCTest, MallocInfo) { fclose(F); EXPECT_EQ(strncmp(Buffer, " #include #include +#include void operator delete(void *, size_t) noexcept; void operator delete[](void *, size_t) noexcept; @@ -18,12 +19,6 @@ void operator delete[](void *, size_t) noexcept; // Note that every Cxx allocation function in the test binary will be fulfilled // by Scudo. See the comment in the C counterpart of this file. -extern "C" __attribute__((visibility("default"))) const char * -__scudo_default_options() { - return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:" - "quarantine_max_chunk_size=512:dealloc_type_mismatch=true"; -} - template static void testCxxNew() { T *P = new T; EXPECT_NE(P, nullptr); From 2045d2c90e240bf618d4e10f78f38dedc6db9357 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 26 Nov 2019 10:24:38 +0100 Subject: [PATCH 123/591] Make memory dump same as the one in asan. Shadow memory (and short granules) are not prepended with memory address and arrow at the end of line is removed. Differential Revision: https://reviews.llvm.org/D70707 --- compiler-rt/lib/hwasan/hwasan_report.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index 606139f2e1787..5df8c0ac91063 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -371,12 +371,13 @@ static void PrintTagInfoAroundAddr(tag_t *tag_ptr, uptr num_rows, InternalScopedString s(GetPageSizeCached() * 8); for (tag_t *row = beg_row; row < end_row; row += row_len) { s.append("%s", row == center_row_beg ? "=>" : " "); + s.append("%p:", row); for (uptr i = 0; i < row_len; i++) { s.append("%s", row + i == tag_ptr ? "[" : " "); print_tag(s, &row[i]); s.append("%s", row + i == tag_ptr ? "]" : " "); } - s.append("%s\n", row == center_row_beg ? "<=" : " "); + s.append("\n"); } Printf("%s", s.data()); } From f30fe16d4902617a33dac1ebca066a7cb7e0f2ec Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 26 Nov 2019 19:17:10 -0800 Subject: [PATCH 124/591] scudo: Call setCurrentTSD(nullptr) when bringing down the TSD registry in tests. Otherwise, we will hit a use-after-free when testing multiple instances of the same allocator on the same thread. This only recently became a problem with D70552 which caused us to run both ScudoCombinedTest.BasicCombined and ScudoCombinedTest.ReleaseToOS on the unit tests' main thread. Differential Revision: https://reviews.llvm.org/D70760 --- compiler-rt/lib/scudo/standalone/tsd_shared.h | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h index a43cf3fc33769..5f58068edf781 100644 --- a/compiler-rt/lib/scudo/standalone/tsd_shared.h +++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h @@ -50,6 +50,7 @@ template struct TSDRegistrySharedT { void unmapTestOnly() { unmap(reinterpret_cast(TSDs), sizeof(TSD) * NumberOfTSDs); + setCurrentTSD(nullptr); } ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, From 6fd6cfdf72f9236520ac642e74cdb93d87532955 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 27 Nov 2019 09:35:47 -0800 Subject: [PATCH 125/591] scudo: Replace a couple of macros with their expansions. The macros INLINE and COMPILER_CHECK always expand to the same thing (inline and static_assert respectively). Both expansions are standards compliant C++ and are used consistently in the rest of LLVM, so let's improve consistency with the rest of LLVM by replacing them with the expansions. Differential Revision: https://reviews.llvm.org/D70793 --- .../lib/scudo/standalone/atomic_helpers.h | 34 +++++++++---------- compiler-rt/lib/scudo/standalone/checksum.h | 2 +- compiler-rt/lib/scudo/standalone/chunk.h | 18 +++++----- compiler-rt/lib/scudo/standalone/combined.h | 6 ++-- compiler-rt/lib/scudo/standalone/common.h | 30 ++++++++-------- .../lib/scudo/standalone/flags_parser.cpp | 2 +- compiler-rt/lib/scudo/standalone/fuchsia.cpp | 4 +-- .../lib/scudo/standalone/internal_defs.h | 3 -- compiler-rt/lib/scudo/standalone/primary32.h | 4 +-- compiler-rt/lib/scudo/standalone/primary64.h | 2 +- compiler-rt/lib/scudo/standalone/quarantine.h | 2 +- compiler-rt/lib/scudo/standalone/report.cpp | 2 +- compiler-rt/lib/scudo/standalone/secondary.h | 2 +- .../lib/scudo/standalone/size_class_map.h | 2 +- compiler-rt/lib/scudo/standalone/tsd.h | 8 ++--- .../lib/scudo/standalone/wrappers_c_checks.h | 10 +++--- 16 files changed, 64 insertions(+), 67 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/atomic_helpers.h b/compiler-rt/lib/scudo/standalone/atomic_helpers.h index 47037d764e252..6c84ba86ed329 100644 --- a/compiler-rt/lib/scudo/standalone/atomic_helpers.h +++ b/compiler-rt/lib/scudo/standalone/atomic_helpers.h @@ -21,12 +21,12 @@ enum memory_order { memory_order_acq_rel = 4, memory_order_seq_cst = 5 }; -COMPILER_CHECK(memory_order_relaxed == __ATOMIC_RELAXED); -COMPILER_CHECK(memory_order_consume == __ATOMIC_CONSUME); -COMPILER_CHECK(memory_order_acquire == __ATOMIC_ACQUIRE); -COMPILER_CHECK(memory_order_release == __ATOMIC_RELEASE); -COMPILER_CHECK(memory_order_acq_rel == __ATOMIC_ACQ_REL); -COMPILER_CHECK(memory_order_seq_cst == __ATOMIC_SEQ_CST); +static_assert(memory_order_relaxed == __ATOMIC_RELAXED, ""); +static_assert(memory_order_consume == __ATOMIC_CONSUME, ""); +static_assert(memory_order_acquire == __ATOMIC_ACQUIRE, ""); +static_assert(memory_order_release == __ATOMIC_RELEASE, ""); +static_assert(memory_order_acq_rel == __ATOMIC_ACQ_REL, ""); +static_assert(memory_order_seq_cst == __ATOMIC_SEQ_CST, ""); struct atomic_u8 { typedef u8 Type; @@ -60,7 +60,7 @@ struct atomic_uptr { }; template -INLINE typename T::Type atomic_load(const volatile T *A, memory_order MO) { +inline typename T::Type atomic_load(const volatile T *A, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); typename T::Type V; __atomic_load(&A->ValDoNotUse, &V, MO); @@ -68,29 +68,29 @@ INLINE typename T::Type atomic_load(const volatile T *A, memory_order MO) { } template -INLINE void atomic_store(volatile T *A, typename T::Type V, memory_order MO) { +inline void atomic_store(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); __atomic_store(&A->ValDoNotUse, &V, MO); } -INLINE void atomic_thread_fence(memory_order) { __sync_synchronize(); } +inline void atomic_thread_fence(memory_order) { __sync_synchronize(); } template -INLINE typename T::Type atomic_fetch_add(volatile T *A, typename T::Type V, +inline typename T::Type atomic_fetch_add(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); return __atomic_fetch_add(&A->ValDoNotUse, V, MO); } template -INLINE typename T::Type atomic_fetch_sub(volatile T *A, typename T::Type V, +inline typename T::Type atomic_fetch_sub(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); return __atomic_fetch_sub(&A->ValDoNotUse, V, MO); } template -INLINE typename T::Type atomic_exchange(volatile T *A, typename T::Type V, +inline typename T::Type atomic_exchange(volatile T *A, typename T::Type V, memory_order MO) { DCHECK(!(reinterpret_cast(A) % sizeof(*A))); typename T::Type R; @@ -99,7 +99,7 @@ INLINE typename T::Type atomic_exchange(volatile T *A, typename T::Type V, } template -INLINE bool atomic_compare_exchange_strong(volatile T *A, typename T::Type *Cmp, +inline bool atomic_compare_exchange_strong(volatile T *A, typename T::Type *Cmp, typename T::Type Xchg, memory_order MO) { return __atomic_compare_exchange(&A->ValDoNotUse, Cmp, &Xchg, false, MO, @@ -107,7 +107,7 @@ INLINE bool atomic_compare_exchange_strong(volatile T *A, typename T::Type *Cmp, } template -INLINE bool atomic_compare_exchange_weak(volatile T *A, typename T::Type *Cmp, +inline bool atomic_compare_exchange_weak(volatile T *A, typename T::Type *Cmp, typename T::Type Xchg, memory_order MO) { return __atomic_compare_exchange(&A->ValDoNotUse, Cmp, &Xchg, true, MO, @@ -117,17 +117,17 @@ INLINE bool atomic_compare_exchange_weak(volatile T *A, typename T::Type *Cmp, // Clutter-reducing helpers. template -INLINE typename T::Type atomic_load_relaxed(const volatile T *A) { +inline typename T::Type atomic_load_relaxed(const volatile T *A) { return atomic_load(A, memory_order_relaxed); } template -INLINE void atomic_store_relaxed(volatile T *A, typename T::Type V) { +inline void atomic_store_relaxed(volatile T *A, typename T::Type V) { atomic_store(A, V, memory_order_relaxed); } template -INLINE typename T::Type atomic_compare_exchange(volatile T *A, +inline typename T::Type atomic_compare_exchange(volatile T *A, typename T::Type Cmp, typename T::Type Xchg) { atomic_compare_exchange_strong(A, &Cmp, Xchg, memory_order_acquire); diff --git a/compiler-rt/lib/scudo/standalone/checksum.h b/compiler-rt/lib/scudo/standalone/checksum.h index 092342fd6efbd..a63b1b4f064d1 100644 --- a/compiler-rt/lib/scudo/standalone/checksum.h +++ b/compiler-rt/lib/scudo/standalone/checksum.h @@ -37,7 +37,7 @@ enum class Checksum : u8 { // significantly on memory accesses, as well as 1K of CRC32 table, on platforms // that do no support hardware CRC32. The checksum itself is 16-bit, which is at // odds with CRC32, but enough for our needs. -INLINE u16 computeBSDChecksum(u16 Sum, uptr Data) { +inline u16 computeBSDChecksum(u16 Sum, uptr Data) { for (u8 I = 0; I < sizeof(Data); I++) { Sum = static_cast((Sum >> 1) | ((Sum & 1) << 15)); Sum = static_cast(Sum + (Data & 0xff)); diff --git a/compiler-rt/lib/scudo/standalone/chunk.h b/compiler-rt/lib/scudo/standalone/chunk.h index 9ae75823ba778..dff13db8a6c81 100644 --- a/compiler-rt/lib/scudo/standalone/chunk.h +++ b/compiler-rt/lib/scudo/standalone/chunk.h @@ -20,7 +20,7 @@ namespace scudo { extern Checksum HashAlgorithm; -INLINE u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { +inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { // If the hardware CRC32 feature is defined here, it was enabled everywhere, // as opposed to only for crc32_hw.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a result @@ -71,7 +71,7 @@ struct UnpackedHeader { uptr Checksum : 16; }; typedef atomic_u64 AtomicPackedHeader; -COMPILER_CHECK(sizeof(UnpackedHeader) == sizeof(PackedHeader)); +static_assert(sizeof(UnpackedHeader) == sizeof(PackedHeader), ""); // Those constants are required to silence some -Werror=conversion errors when // assigning values to the related bitfield variables. @@ -86,12 +86,12 @@ constexpr uptr getHeaderSize() { return roundUpTo(sizeof(PackedHeader), 1U << SCUDO_MIN_ALIGNMENT_LOG); } -INLINE AtomicPackedHeader *getAtomicHeader(void *Ptr) { +inline AtomicPackedHeader *getAtomicHeader(void *Ptr) { return reinterpret_cast(reinterpret_cast(Ptr) - getHeaderSize()); } -INLINE +inline const AtomicPackedHeader *getConstAtomicHeader(const void *Ptr) { return reinterpret_cast( reinterpret_cast(Ptr) - getHeaderSize()); @@ -100,7 +100,7 @@ const AtomicPackedHeader *getConstAtomicHeader(const void *Ptr) { // We do not need a cryptographically strong hash for the checksum, but a CRC // type function that can alert us in the event a header is invalid or // corrupted. Ideally slightly better than a simple xor of all fields. -static INLINE u16 computeHeaderChecksum(u32 Cookie, const void *Ptr, +static inline u16 computeHeaderChecksum(u32 Cookie, const void *Ptr, UnpackedHeader *Header) { UnpackedHeader ZeroChecksumHeader = *Header; ZeroChecksumHeader.Checksum = 0; @@ -110,7 +110,7 @@ static INLINE u16 computeHeaderChecksum(u32 Cookie, const void *Ptr, ARRAY_SIZE(HeaderHolder)); } -INLINE void storeHeader(u32 Cookie, void *Ptr, +inline void storeHeader(u32 Cookie, void *Ptr, UnpackedHeader *NewUnpackedHeader) { NewUnpackedHeader->Checksum = computeHeaderChecksum(Cookie, Ptr, NewUnpackedHeader); @@ -118,7 +118,7 @@ INLINE void storeHeader(u32 Cookie, void *Ptr, atomic_store_relaxed(getAtomicHeader(Ptr), NewPackedHeader); } -INLINE +inline void loadHeader(u32 Cookie, const void *Ptr, UnpackedHeader *NewUnpackedHeader) { PackedHeader NewPackedHeader = atomic_load_relaxed(getConstAtomicHeader(Ptr)); @@ -128,7 +128,7 @@ void loadHeader(u32 Cookie, const void *Ptr, reportHeaderCorruption(const_cast(Ptr)); } -INLINE void compareExchangeHeader(u32 Cookie, void *Ptr, +inline void compareExchangeHeader(u32 Cookie, void *Ptr, UnpackedHeader *NewUnpackedHeader, UnpackedHeader *OldUnpackedHeader) { NewUnpackedHeader->Checksum = @@ -141,7 +141,7 @@ INLINE void compareExchangeHeader(u32 Cookie, void *Ptr, reportHeaderRace(Ptr); } -INLINE +inline bool isValid(u32 Cookie, const void *Ptr, UnpackedHeader *NewUnpackedHeader) { PackedHeader NewPackedHeader = atomic_load_relaxed(getConstAtomicHeader(Ptr)); *NewUnpackedHeader = bit_cast(NewPackedHeader); diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h index 8560c2d3599f3..f33c9150148f9 100644 --- a/compiler-rt/lib/scudo/standalone/combined.h +++ b/compiler-rt/lib/scudo/standalone/combined.h @@ -184,7 +184,7 @@ template class Allocator { ((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize()); // Takes care of extravagantly large sizes as well as integer overflows. - COMPILER_CHECK(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment); + static_assert(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment, ""); if (UNLIKELY(Size >= MaxAllowedMallocSize)) { if (Options.MayReturnNull) return nullptr; @@ -523,7 +523,7 @@ template class Allocator { reportSanityCheckError("class ID"); } - static INLINE void *getBlockBegin(const void *Ptr, + static inline void *getBlockBegin(const void *Ptr, Chunk::UnpackedHeader *Header) { return reinterpret_cast( reinterpret_cast(Ptr) - Chunk::getHeaderSize() - @@ -531,7 +531,7 @@ template class Allocator { } // Return the size of a chunk as requested during its allocation. - INLINE uptr getSize(const void *Ptr, Chunk::UnpackedHeader *Header) { + inline uptr getSize(const void *Ptr, Chunk::UnpackedHeader *Header) { const uptr SizeOrUnusedBytes = Header->SizeOrUnusedBytes; if (LIKELY(Header->ClassId)) return SizeOrUnusedBytes; diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h index c015d1ca56696..a76eb6bbc1645 100644 --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -19,22 +19,22 @@ namespace scudo { -template INLINE Dest bit_cast(const Source &S) { - COMPILER_CHECK(sizeof(Dest) == sizeof(Source)); +template inline Dest bit_cast(const Source &S) { + static_assert(sizeof(Dest) == sizeof(Source), ""); Dest D; memcpy(&D, &S, sizeof(D)); return D; } -INLINE constexpr uptr roundUpTo(uptr X, uptr Boundary) { +inline constexpr uptr roundUpTo(uptr X, uptr Boundary) { return (X + Boundary - 1) & ~(Boundary - 1); } -INLINE constexpr uptr roundDownTo(uptr X, uptr Boundary) { +inline constexpr uptr roundDownTo(uptr X, uptr Boundary) { return X & ~(Boundary - 1); } -INLINE constexpr bool isAligned(uptr X, uptr Alignment) { +inline constexpr bool isAligned(uptr X, uptr Alignment) { return (X & (Alignment - 1)) == 0; } @@ -48,14 +48,14 @@ template void Swap(T &A, T &B) { B = Tmp; } -INLINE bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } +inline bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } -INLINE uptr getMostSignificantSetBitIndex(uptr X) { +inline uptr getMostSignificantSetBitIndex(uptr X) { DCHECK_NE(X, 0U); return SCUDO_WORDSIZE - 1U - static_cast(__builtin_clzl(X)); } -INLINE uptr roundUpToPowerOfTwo(uptr Size) { +inline uptr roundUpToPowerOfTwo(uptr Size) { DCHECK(Size); if (isPowerOfTwo(Size)) return Size; @@ -65,17 +65,17 @@ INLINE uptr roundUpToPowerOfTwo(uptr Size) { return 1UL << (Up + 1); } -INLINE uptr getLeastSignificantSetBitIndex(uptr X) { +inline uptr getLeastSignificantSetBitIndex(uptr X) { DCHECK_NE(X, 0U); return static_cast(__builtin_ctzl(X)); } -INLINE uptr getLog2(uptr X) { +inline uptr getLog2(uptr X) { DCHECK(isPowerOfTwo(X)); return getLeastSignificantSetBitIndex(X); } -INLINE u32 getRandomU32(u32 *State) { +inline u32 getRandomU32(u32 *State) { // ANSI C linear congruential PRNG (16-bit output). // return (*State = *State * 1103515245 + 12345) >> 16; // XorShift (32-bit output). @@ -85,11 +85,11 @@ INLINE u32 getRandomU32(u32 *State) { return *State; } -INLINE u32 getRandomModN(u32 *State, u32 N) { +inline u32 getRandomModN(u32 *State, u32 N) { return getRandomU32(State) % N; // [0, N) } -template INLINE void shuffle(T *A, u32 N, u32 *RandState) { +template inline void shuffle(T *A, u32 N, u32 *RandState) { if (N <= 1) return; u32 State = *RandState; @@ -100,7 +100,7 @@ template INLINE void shuffle(T *A, u32 N, u32 *RandState) { // Hardware specific inlinable functions. -INLINE void yieldProcessor(u8 Count) { +inline void yieldProcessor(u8 Count) { #if defined(__i386__) || defined(__x86_64__) __asm__ __volatile__("" ::: "memory"); for (u8 I = 0; I < Count; I++) @@ -117,7 +117,7 @@ INLINE void yieldProcessor(u8 Count) { extern uptr PageSizeCached; uptr getPageSizeSlow(); -INLINE uptr getPageSizeCached() { +inline uptr getPageSizeCached() { // Bionic uses a hardcoded value. if (SCUDO_ANDROID) return 4096U; diff --git a/compiler-rt/lib/scudo/standalone/flags_parser.cpp b/compiler-rt/lib/scudo/standalone/flags_parser.cpp index 070c08b019384..be39fcd4f8879 100644 --- a/compiler-rt/lib/scudo/standalone/flags_parser.cpp +++ b/compiler-rt/lib/scudo/standalone/flags_parser.cpp @@ -108,7 +108,7 @@ void FlagParser::parseString(const char *S) { Pos = OldPos; } -INLINE bool parseBool(const char *Value, bool *b) { +inline bool parseBool(const char *Value, bool *b) { if (strncmp(Value, "0", 1) == 0 || strncmp(Value, "no", 2) == 0 || strncmp(Value, "false", 5) == 0) { *b = false; diff --git a/compiler-rt/lib/scudo/standalone/fuchsia.cpp b/compiler-rt/lib/scudo/standalone/fuchsia.cpp index 0a9483ae1dd0d..b3d72de158cf9 100644 --- a/compiler-rt/lib/scudo/standalone/fuchsia.cpp +++ b/compiler-rt/lib/scudo/standalone/fuchsia.cpp @@ -29,7 +29,7 @@ void NORETURN die() { __builtin_trap(); } // We zero-initialize the Extra parameter of map(), make sure this is consistent // with ZX_HANDLE_INVALID. -COMPILER_CHECK(ZX_HANDLE_INVALID == 0); +static_assert(ZX_HANDLE_INVALID == 0, ""); static void *allocateVmar(uptr Size, MapPlatformData *Data, bool AllowNoMem) { // Only scenario so far. @@ -171,7 +171,7 @@ u64 getMonotonicTime() { return _zx_clock_get_monotonic(); } u32 getNumberOfCPUs() { return _zx_system_get_num_cpus(); } bool getRandom(void *Buffer, uptr Length, UNUSED bool Blocking) { - COMPILER_CHECK(MaxRandomLength <= ZX_CPRNG_DRAW_MAX_LEN); + static_assert(MaxRandomLength <= ZX_CPRNG_DRAW_MAX_LEN, ""); if (UNLIKELY(!Buffer || !Length || Length > MaxRandomLength)) return false; _zx_cprng_draw(Buffer, Length); diff --git a/compiler-rt/lib/scudo/standalone/internal_defs.h b/compiler-rt/lib/scudo/standalone/internal_defs.h index f80c0f621a462..8f6a89ecba737 100644 --- a/compiler-rt/lib/scudo/standalone/internal_defs.h +++ b/compiler-rt/lib/scudo/standalone/internal_defs.h @@ -30,7 +30,6 @@ #define INTERFACE __attribute__((visibility("default"))) #define WEAK __attribute__((weak)) -#define INLINE inline #define ALWAYS_INLINE inline __attribute__((always_inline)) #define ALIAS(X) __attribute__((alias(X))) // Please only use the ALIGNED macro before the type. Using ALIGNED after the @@ -126,8 +125,6 @@ void NORETURN reportCheckFailed(const char *File, int Line, die(); \ } while (0) -#define COMPILER_CHECK(Pred) static_assert(Pred, "") - } // namespace scudo #endif // SCUDO_INTERNAL_DEFS_H_ diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h index a0d8560c3f6c9..945324914d30f 100644 --- a/compiler-rt/lib/scudo/standalone/primary32.h +++ b/compiler-rt/lib/scudo/standalone/primary32.h @@ -42,7 +42,7 @@ template class SizeClassAllocator32 { public: typedef SizeClassMapT SizeClassMap; // Regions should be large enough to hold the largest Block. - COMPILER_CHECK((1UL << RegionSizeLog) >= SizeClassMap::MaxSize); + static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, ""); typedef SizeClassAllocator32 ThisT; typedef SizeClassAllocatorLocalCache CacheT; typedef typename CacheT::TransferBatch TransferBatch; @@ -204,7 +204,7 @@ template class SizeClassAllocator32 { uptr AllocatedUser; ReleaseToOsInfo ReleaseInfo; }; - COMPILER_CHECK(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0); + static_assert(sizeof(SizeClassInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr computeRegionId(uptr Mem) { const uptr Id = Mem >> RegionSizeLog; diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 559742d05ad9e..b208ff69bb055 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -215,7 +215,7 @@ template class SizeClassAllocator64 { MapPlatformData Data; ReleaseToOsInfo ReleaseInfo; }; - COMPILER_CHECK(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0); + static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); uptr PrimaryBase; RegionInfo *RegionInfoArray; diff --git a/compiler-rt/lib/scudo/standalone/quarantine.h b/compiler-rt/lib/scudo/standalone/quarantine.h index 4b3f368ad9659..2bf7e804ef359 100644 --- a/compiler-rt/lib/scudo/standalone/quarantine.h +++ b/compiler-rt/lib/scudo/standalone/quarantine.h @@ -59,7 +59,7 @@ struct QuarantineBatch { void shuffle(u32 State) { ::scudo::shuffle(Batch, Count, &State); } }; -COMPILER_CHECK(sizeof(QuarantineBatch) <= (1U << 13)); // 8Kb. +static_assert(sizeof(QuarantineBatch) <= (1U << 13), ""); // 8Kb. // Per-thread cache of memory blocks. template class QuarantineCache { diff --git a/compiler-rt/lib/scudo/standalone/report.cpp b/compiler-rt/lib/scudo/standalone/report.cpp index 12d851ff019ad..80cc6eda2af92 100644 --- a/compiler-rt/lib/scudo/standalone/report.cpp +++ b/compiler-rt/lib/scudo/standalone/report.cpp @@ -34,7 +34,7 @@ class ScopedErrorReport { ScopedString Message; }; -INLINE void NORETURN trap() { __builtin_trap(); } +inline void NORETURN trap() { __builtin_trap(); } // This could potentially be called recursively if a CHECK fails in the reports. void NORETURN reportCheckFailed(const char *File, int Line, diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h index d44d2aeaf686a..ab68e5a1d38d7 100644 --- a/compiler-rt/lib/scudo/standalone/secondary.h +++ b/compiler-rt/lib/scudo/standalone/secondary.h @@ -52,7 +52,7 @@ template class MapAllocator { public: // Ensure the freelist is disabled on Fuchsia, since it doesn't support // releasing Secondary blocks yet. - COMPILER_CHECK(!SCUDO_FUCHSIA || MaxFreeListSize == 0U); + static_assert(!SCUDO_FUCHSIA || MaxFreeListSize == 0U, ""); void initLinkerInitialized(GlobalStats *S) { Stats.initLinkerInitialized(); diff --git a/compiler-rt/lib/scudo/standalone/size_class_map.h b/compiler-rt/lib/scudo/standalone/size_class_map.h index 59d6ede57ed27..947526e8aea17 100644 --- a/compiler-rt/lib/scudo/standalone/size_class_map.h +++ b/compiler-rt/lib/scudo/standalone/size_class_map.h @@ -49,7 +49,7 @@ class SizeClassMap { static const uptr MaxSize = 1UL << MaxSizeLog; static const uptr NumClasses = MidClass + ((MaxSizeLog - MidSizeLog) << S) + 1; - COMPILER_CHECK(NumClasses <= 256); + static_assert(NumClasses <= 256, ""); static const uptr LargestClassId = NumClasses - 1; static const uptr BatchClassId = 0; diff --git a/compiler-rt/lib/scudo/standalone/tsd.h b/compiler-rt/lib/scudo/standalone/tsd.h index f24ff01960fb2..626cc4b80fb7b 100644 --- a/compiler-rt/lib/scudo/standalone/tsd.h +++ b/compiler-rt/lib/scudo/standalone/tsd.h @@ -38,7 +38,7 @@ template struct ALIGNED(SCUDO_CACHE_LINE_SIZE) TSD { void commitBack(Allocator *Instance) { Instance->commitBack(this); } - INLINE bool tryLock() { + inline bool tryLock() { if (Mutex.tryLock()) { atomic_store_relaxed(&Precedence, 0); return true; @@ -49,12 +49,12 @@ template struct ALIGNED(SCUDO_CACHE_LINE_SIZE) TSD { static_cast(getMonotonicTime() >> FIRST_32_SECOND_64(16, 0))); return false; } - INLINE void lock() { + inline void lock() { atomic_store_relaxed(&Precedence, 0); Mutex.lock(); } - INLINE void unlock() { Mutex.unlock(); } - INLINE uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } + inline void unlock() { Mutex.unlock(); } + inline uptr getPrecedence() { return atomic_load_relaxed(&Precedence); } private: HybridMutex Mutex; diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h b/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h index d4370d506e5ea..7fc1a9600e53b 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h @@ -20,7 +20,7 @@ namespace scudo { // A common errno setting logic shared by almost all Scudo C wrappers. -INLINE void *setErrnoOnNull(void *Ptr) { +inline void *setErrnoOnNull(void *Ptr) { if (UNLIKELY(!Ptr)) errno = ENOMEM; return Ptr; @@ -30,14 +30,14 @@ INLINE void *setErrnoOnNull(void *Ptr) { // Checks aligned_alloc() parameters, verifies that the alignment is a power of // two and that the size is a multiple of alignment. -INLINE bool checkAlignedAllocAlignmentAndSize(uptr Alignment, uptr Size) { +inline bool checkAlignedAllocAlignmentAndSize(uptr Alignment, uptr Size) { return Alignment == 0 || !isPowerOfTwo(Alignment) || !isAligned(Size, Alignment); } // Checks posix_memalign() parameters, verifies that alignment is a power of two // and a multiple of sizeof(void *). -INLINE bool checkPosixMemalignAlignment(uptr Alignment) { +inline bool checkPosixMemalignAlignment(uptr Alignment) { return Alignment == 0 || !isPowerOfTwo(Alignment) || !isAligned(Alignment, sizeof(void *)); } @@ -45,7 +45,7 @@ INLINE bool checkPosixMemalignAlignment(uptr Alignment) { // Returns true if calloc(Size, N) overflows on Size*N calculation. Use a // builtin supported by recent clang & GCC if it exists, otherwise fallback to a // costly division. -INLINE bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) { +inline bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) { #if __has_builtin(__builtin_umull_overflow) return __builtin_umull_overflow(Size, N, Product); #else @@ -58,7 +58,7 @@ INLINE bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) { // Returns true if the size passed to pvalloc overflows when rounded to the next // multiple of PageSize. -INLINE bool checkForPvallocOverflow(uptr Size, uptr PageSize) { +inline bool checkForPvallocOverflow(uptr Size, uptr PageSize) { return roundUpTo(Size, PageSize) < Size; } From b208088a2111aeb805d0984a2ff86b3ce14c725a Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 26 Nov 2019 18:18:14 -0800 Subject: [PATCH 126/591] scudo: Limit the number of bytes tested in a realloc test. This test was previously effectively doing: P = malloc(X); write X bytes to P; P = realloc(P, X - Y); P = realloc(P, X) and expecting that all X bytes stored to P would still be identical after the final realloc. This happens to be true for the current scudo implementation of realloc, but is not guaranteed to be true by the C standard ("Any bytes in the new object beyond the size of the old object have indeterminate values."). This implementation detail will change with the new memory tagging support, which unconditionally zeros newly allocated granules when memory tagging is enabled. Fix this by limiting the number of bytes that we test to the minimum size that we realloc the allocation to. Differential Revision: https://reviews.llvm.org/D70761 --- compiler-rt/lib/scudo/standalone/tests/combined_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp index 849fa713ad1d2..f38e9826863b9 100644 --- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp @@ -111,7 +111,7 @@ template static void testAllocator() { const scudo::uptr NewSize = DataSize + Delta; void *NewP = Allocator->reallocate(P, NewSize); EXPECT_EQ(NewP, P); - for (scudo::uptr I = 0; I < scudo::Min(DataSize, NewSize); I++) + for (scudo::uptr I = 0; I < DataSize - 32; I++) EXPECT_EQ((reinterpret_cast(NewP))[I], Marker); } Allocator->deallocate(P, Origin); From b19ec1eb3d0cbb3017e1bc7111efac5643cf4fdd Mon Sep 17 00:00:00 2001 From: Taewook Oh Date: Wed, 27 Nov 2019 10:18:01 -0800 Subject: [PATCH 127/591] [BPI] Improve unreachable/ColdCall heurstics to handle loops. Summary: While updatePostDominatedByUnreachable attemps to find basic blocks that are post-domianted by unreachable blocks, it currently cannot handle loops precisely, because it doesn't use the actual post dominator tree analysis but relies on heuristics of visiting basic blocks in post-order. More precisely, when the entire loop is post-dominated by the unreachable block, current algorithm fails to detect the entire loop as post-dominated by the unreachable because when the algorithm reaches to the loop latch it fails to tell all its successors (including the loop header) will "eventually" be post-domianted by the unreachable block, because the algorithm hasn't visited the loop header yet. This makes BPI for the loop latch to assume that loop backedges are taken with 100% of probability. And because of this, block frequency info sometimes marks virtually dead loops (which are post dominated by unreachable blocks) super hot, because 100% backedge-taken probability makes the loop iteration count the max value. updatePostDominatedByColdCall has the exact same problem as well. To address this problem, this patch makes PostDominatedByUnreachable/PostDominatedByColdCall to be computed with the actual post-dominator tree. Reviewers: skatkov, chandlerc, manmanren Reviewed By: skatkov Subscribers: manmanren, vsk, apilipenko, Carrot, qcolombet, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70104 --- .../llvm/Analysis/BranchProbabilityInfo.h | 7 +- llvm/lib/Analysis/BranchProbabilityInfo.cpp | 132 ++++++++------- .../Analysis/BranchProbabilityInfo/basic.ll | 18 ++ .../BranchProbabilityInfo/noreturn.ll | 26 +++ llvm/test/CodeGen/X86/block-placement.ll | 4 +- llvm/test/CodeGen/X86/pr37916.ll | 1 - llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 160 +++++++++--------- 7 files changed, 205 insertions(+), 143 deletions(-) diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index c8965936fb9c1..41d6c23b8d0d9 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -34,6 +34,7 @@ namespace llvm { class Function; class LoopInfo; class raw_ostream; +class PostDominatorTree; class TargetLibraryInfo; class Value; @@ -187,8 +188,10 @@ class BranchProbabilityInfo { /// Track the set of blocks that always lead to a cold call. SmallPtrSet PostDominatedByColdCall; - void updatePostDominatedByUnreachable(const BasicBlock *BB); - void updatePostDominatedByColdCall(const BasicBlock *BB); + void computePostDominatedByUnreachable(const Function &F, + PostDominatorTree *PDT); + void computePostDominatedByColdCall(const Function &F, + PostDominatorTree *PDT); bool calcUnreachableHeuristics(const BasicBlock *BB); bool calcMetadataWeights(const BasicBlock *BB); bool calcColdCallHeuristics(const BasicBlock *BB); diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 7bd237b9ad537..ffba65b5ed5ee 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -146,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -/// Add \p BB to PostDominatedByUnreachable set if applicable. -void -BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { - const Instruction *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) { - if (isa(TI) || - // If this block is terminated by a call to - // @llvm.experimental.deoptimize then treat it like an unreachable since - // the @llvm.experimental.deoptimize call is expected to practically - // never execute. - BB->getTerminatingDeoptimizeCall()) - PostDominatedByUnreachable.insert(BB); - return; - } +static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, + SmallVectorImpl &WorkList, + SmallPtrSetImpl &TargetSet) { + SmallVector Descendants; + SmallPtrSet NewItems; + + PDT->getDescendants(const_cast(BB), Descendants); + for (auto *BB : Descendants) + if (TargetSet.insert(BB).second) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + if (!TargetSet.count(*PI)) + NewItems.insert(*PI); + WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); +} - // If the terminator is an InvokeInst, check only the normal destination block - // as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(TI)) { - if (PostDominatedByUnreachable.count(II->getNormalDest())) - PostDominatedByUnreachable.insert(BB); - return; +/// Compute a set of basic blocks that are post-dominated by unreachables. +void BranchProbabilityInfo::computePostDominatedByUnreachable( + const Function &F, PostDominatorTree *PDT) { + SmallVector WorkList; + for (auto &BB : F) { + const Instruction *TI = BB.getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa(TI) || + // If this block is terminated by a call to + // @llvm.experimental.deoptimize then treat it like an unreachable + // since the @llvm.experimental.deoptimize call is expected to + // practically never execute. + BB.getTerminatingDeoptimizeCall()) + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); + } } - for (auto *I : successors(BB)) - // If any of successor is not post dominated then BB is also not. - if (!PostDominatedByUnreachable.count(I)) - return; - - PostDominatedByUnreachable.insert(BB); + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); + if (PostDominatedByUnreachable.count(BB)) + continue; + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast(BB->getTerminator())) { + if (PostDominatedByUnreachable.count(II->getNormalDest())) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + } + // If all the successors are unreachable, BB is unreachable as well. + else if (!successors(BB).empty() && + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { + return PostDominatedByUnreachable.count(Succ); + })) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + } } -/// Add \p BB to PostDominatedByColdCall set if applicable. -void -BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { - assert(!PostDominatedByColdCall.count(BB)); - const Instruction *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) - return; +/// compute a set of basic blocks that are post-dominated by ColdCalls. +void BranchProbabilityInfo::computePostDominatedByColdCall( + const Function &F, PostDominatorTree *PDT) { + SmallVector WorkList; + for (auto &BB : F) + for (auto &I : BB) + if (const CallInst *CI = dyn_cast(&I)) + if (CI->hasFnAttr(Attribute::Cold)) + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); - // If all of successor are post dominated then BB is also done. - if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) { - return PostDominatedByColdCall.count(SuccBB); - })) { - PostDominatedByColdCall.insert(BB); - return; - } + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(TI)) - if (PostDominatedByColdCall.count(II->getNormalDest())) { - PostDominatedByColdCall.insert(BB); - return; + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast(BB->getTerminator())) { + if (PostDominatedByColdCall.count(II->getNormalDest())) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); } - - // Otherwise, if the block itself contains a cold function, add it to the - // set of blocks post-dominated by a cold call. - for (auto &I : *BB) - if (const CallInst *CI = dyn_cast(&I)) - if (CI->hasFnAttr(Attribute::Cold)) { - PostDominatedByColdCall.insert(BB); - return; - } + // If all of successor are post dominated then BB is also done. + else if (!successors(BB).empty() && + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { + return PostDominatedByColdCall.count(Succ); + })) + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); + } } /// Calculate edge weights for successors lead to unreachable. @@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, LLVM_DEBUG(dbgs() << "\n"); } + std::unique_ptr PDT = + std::make_unique(const_cast(F)); + computePostDominatedByUnreachable(F, PDT.get()); + computePostDominatedByColdCall(F, PDT.get()); + // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. for (auto BB : post_order(&F.getEntryBlock())) { LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); - updatePostDominatedByUnreachable(BB); - updatePostDominatedByColdCall(BB); // If there is no at least two successors, no sense to set probability. if (BB->getTerminator()->getNumSuccessors() < 2) continue; diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll index 64e0a82456f11..8212cc4769045 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -141,6 +141,24 @@ exit: ret i32 %result } +define i32 @test_cold_loop(i32 %a, i32 %b) { +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit + +header: + br label %body + +body: + %cond2 = icmp eq i32 %b, 42 + br i1 %cond2, label %header, label %exit +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% + +exit: + call void @coldfunc() + ret i32 %b +} + declare i32 @regular_function(i32 %i) define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) { diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll index 0566ca16c2f3a..6e01afd2cfc82 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -79,6 +79,32 @@ exit: ret i32 %b } +define i32 @test4(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test4' +; Make sure we handle loops post-dominated by unreachables. +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit +; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] + +header: + br label %body + +body: + %cond2 = icmp eq i32 %a, 42 + br i1 %cond2, label %header, label %abort +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% + +abort: + call void @abort() noreturn + unreachable + +exit: + ret i32 %b +} + @_ZTIi = external global i8* ; CHECK-LABEL: throwSmallException diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index acc4b7e138118..258cc2031ae8b 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) { ; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 +; CHECK: %loop.body3 +; CHECK: %loop.inner1.begin ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin ; CHECK: %loop.inner2.begin -; CHECK: %loop.body3 -; CHECK: %loop.inner1.begin ; CHECK: %bail entry: diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll index 2da9413a9a0cf..484104da9ff47 100644 --- a/llvm/test/CodeGen/X86/pr37916.ll +++ b/llvm/test/CodeGen/X86/pr37916.ll @@ -7,7 +7,6 @@ define void @fn1() local_unnamed_addr { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %if.end ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl a+4, %eax diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index 9238ab0bf89f7..92708d33924f0 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -29,8 +29,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 56 -; CHECK-NEXT: subq $536, %rsp ## imm = 0x218 -; CHECK-NEXT: .cfi_def_cfa_offset 592 +; CHECK-NEXT: subq $552, %rsp ## imm = 0x228 +; CHECK-NEXT: .cfi_def_cfa_offset 608 ; CHECK-NEXT: .cfi_offset %rbx, -56 ; CHECK-NEXT: .cfi_offset %r12, -48 ; CHECK-NEXT: .cfi_offset %r13, -40 @@ -54,7 +54,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: LBB0_4: ## %cleanup -; CHECK-NEXT: addq $536, %rsp ## imm = 0x218 +; CHECK-NEXT: addq $552, %rsp ## imm = 0x228 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 ; CHECK-NEXT: popq %r13 @@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rdi, %rbp +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -78,10 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: callq _memset ; CHECK-NEXT: LBB0_8: ## %while.body.preheader -; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx -; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx +; CHECK-NEXT: leaq 8(%rcx,%rax), %rax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: movl $1, %r15d ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl @@ -92,69 +92,70 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: testb %r14b, %r14b +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: testb %bpl, %bpl ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: leaq {{.*}}(%rip), %r13 +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: movl %ebp, %r12d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: decl %r15d ; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: movl %r12d, %ebp ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r14), %eax +; CHECK-NEXT: leal -268(%rbp), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%rdi,%rax,4), %rax -; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: movslq (%r13,%rax,4), %rax +; CHECK-NEXT: addq %r13, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movl %ebp, %r12d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%r14), %eax +; CHECK-NEXT: leal 1(%rbp), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r13d -; CHECK-NEXT: movslq (%rsi,%rax,4), %rax -; CHECK-NEXT: addq %rsi, %rax +; CHECK-NEXT: movl $-1, %r12d +; CHECK-NEXT: leaq {{.*}}(%rip), %rcx +; CHECK-NEXT: movslq (%rcx,%rax,4), %rax +; CHECK-NEXT: addq %rcx, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r13d +; CHECK-NEXT: movl $1, %r12d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: ## implicit-def: $r14 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: ## implicit-def: $r12 +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: ## implicit-def: $r14 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -165,8 +166,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 -; CHECK-NEXT: leaq 1(%r12), %rax -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: leaq 1(%r14), %rax +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: je LBB0_33 ; CHECK-NEXT: ## %bb.29: ## %land.rhs485 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 @@ -175,15 +176,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movq %rax, %r12 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movq %rax, %r14 +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: jmp LBB0_34 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 @@ -193,23 +193,23 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_19: ## %sw.bb243 +; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r13d +; CHECK-NEXT: movl $20, %r12d ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_40: ## %sw.bb566 +; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r13d +; CHECK-NEXT: movl $2, %r12d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r12 +; CHECK-NEXT: incq %r14 ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r13), %eax +; CHECK-NEXT: leal -324(%r12), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -219,11 +219,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r13d +; CHECK-NEXT: cmpl $11, %r12d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r13d +; CHECK-NEXT: cmpl $24, %r12d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -233,15 +233,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_38: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne LBB0_38 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movb $0, (%r12) -; CHECK-NEXT: movl %r14d, %r13d -; CHECK-NEXT: leaq {{.*}}(%rip), %rsi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movb $0, (%r14) +; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 @@ -256,30 +255,44 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: xorl %r13d, %r13d +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: xorl %r12d, %r12d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r13d -; CHECK-NEXT: cmpl $16, %r13d +; CHECK-NEXT: incl %r12d +; CHECK-NEXT: cmpl $16, %r12d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r13d, %eax +; CHECK-NEXT: btl %r12d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: LBB0_48: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write -; CHECK-NEXT: subq %rbp, %rbx +; CHECK-NEXT: subq %rbx, %r14 ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax -; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax +; CHECK-NEXT: leaq 8189(%r14,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_49: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: decq %rax ; CHECK-NEXT: jmp LBB0_49 +; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit +; CHECK-NEXT: movq %r14, %rbx +; CHECK-NEXT: jmp LBB0_48 +; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB0_41 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_17: ## %for.body643.us +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_17 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_41: ## %while.cond661 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_41 ; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 ; CHECK-NEXT: cmpq %rax, %rax @@ -289,14 +302,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_54 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader -; CHECK-NEXT: incl %ebp -; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill ; CHECK-NEXT: LBB0_53: ## %while.body1679 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq (%rbx), %rdi +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload +; CHECK-NEXT: movq (%rax), %rdi ; CHECK-NEXT: callq _fileno -; CHECK-NEXT: movslq %ebp, %rax -; CHECK-NEXT: leal 1(%rax), %ebp +; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload +; CHECK-NEXT: leal 1(%rax), %ecx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: cmpq %rax, %rax ; CHECK-NEXT: jl LBB0_53 ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader @@ -304,22 +318,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: LBB0_55: ## %if.then.i ; CHECK-NEXT: ud2 -; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload -; CHECK-NEXT: movq %rbx, %rbp -; CHECK-NEXT: jmp LBB0_48 -; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_41 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_17: ## %for.body643.us -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_17 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_41: ## %while.cond661 -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_41 entry: %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64 %old = alloca [512 x i8], align 16 From ebfff46c8d29efd9767a24043766ddd588db26c3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 26 Nov 2019 16:57:26 -0800 Subject: [PATCH 128/591] [LegalizeTypes][FPEnv][X86] Add initial support for softening strict fp nodes This is based on what's required for softening fp128 operations on 32-bit X86 assuming f32/f64/f80 are legal. So there could be some things missing. Differential Revision: https://reviews.llvm.org/D70654 --- .../SelectionDAG/LegalizeFloatTypes.cpp | 190 ++++- .../test/CodeGen/X86/fp128-libcalls-strict.ll | 788 +++++++++++++++++- 2 files changed, 932 insertions(+), 46 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 65ccb996299d1..f622f0a6306d8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -65,35 +65,60 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::STRICT_FMINNUM: case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; + case ISD::STRICT_FADD: case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break; + case ISD::STRICT_FCEIL: case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::STRICT_FCOS: case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::STRICT_FDIV: case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::STRICT_FEXP: case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::STRICT_FEXP2: case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::STRICT_FFLOOR: case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::STRICT_FLOG: case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::STRICT_FLOG2: case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::STRICT_FLOG10: case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::STRICT_FMA: case ISD::FMA: R = SoftenFloatRes_FMA(N); break; + case ISD::STRICT_FMUL: case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; + case ISD::STRICT_FPOW: case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::STRICT_FPOWI: case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::STRICT_FRINT: case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::STRICT_FROUND: case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::STRICT_FSQRT: case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::STRICT_FSUB: case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::STRICT_FTRUNC: case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; @@ -113,25 +138,43 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - assert(N->getNumOperands() == 1 && "Unexpected number of operands!"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getNumOperands() == (1 + Offset) && + "Unexpected number of operands!"); + SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpVT = N->getOperand(0).getValueType(); + EVT OpVT = N->getOperand(0 + Offset).getValueType(); CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - assert(N->getNumOperands() == 2 && "Unexpected number of operands!"); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)) }; + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getNumOperands() == (2 + Offset) && + "Unexpected number of operands!"); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + GetSoftenedFloat(N->getOperand(1 + Offset)) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; + EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { @@ -368,22 +411,29 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), - GetSoftenedFloat(N->getOperand(1)), - GetSoftenedFloat(N->getOperand(2)) }; + unsigned Offset = IsStrict ? 1 : 0; + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + GetSoftenedFloat(N->getOperand(1 + Offset)), + GetSoftenedFloat(N->getOperand(2 + Offset)) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[3] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType(), - N->getOperand(2).getValueType() }; + EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType(), + N->getOperand(2 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_F128, - RTLIB::FMA_PPCF128), - NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair Tmp = TLI.makeLibCall(DAG, + GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + NVT, Ops, CallOptions, SDLoc(N), Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -430,14 +480,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = N->getOperand(0); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's // entirely possible for both f16 and f32 to be legal, so use the fully // hard-float FP_EXTEND rather than FP16_TO_FP. if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { - Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + if (IsStrict) { + Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), + { MVT::f32, MVT::Other }, { Chain, Op }); + Chain = Op.getValue(1); + } else { + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + } + if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) AddToWorklist(Op.getNode()); } @@ -456,7 +516,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { TargetLowering::MakeLibCallOptions CallOptions; EVT OpsVT[1] = { N->getOperand(0).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -505,7 +570,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { - assert(N->getOperand(1).getValueType() == MVT::i32 && + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 && "Unsupported power type!"); RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, @@ -521,12 +588,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { } EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)), + N->getOperand(1 + Offset) }; + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[2] = { N->getOperand(0).getValueType(), - N->getOperand(1).getValueType() }; + EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), + N->getOperand(1 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N)).first; + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -718,12 +792,17 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::STRICT_LROUND: case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break; + case ISD::STRICT_LLROUND: case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; + case ISD::STRICT_LRINT: case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; + case ISD::STRICT_LLRINT: case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; @@ -774,20 +853,31 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // We actually deal with the partially-softened FP_TO_FP16 node too, which // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. - assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_ROUND); - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + Op = GetSoftenedFloat(Op); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) { + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); + } + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -947,15 +1037,27 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + bool IsStrict = N->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset)); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpVT = N->getOperand(0).getValueType(); + EVT OpVT = N->getOperand(0 + Offset).getValueType(); CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) { + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Tmp.first); + return SDValue(); + } + + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { - EVT OpVT = N->getOperand(0).getValueType(); + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, @@ -965,7 +1067,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { - EVT OpVT = N->getOperand(0).getValueType(); + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, @@ -975,7 +1077,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { - EVT OpVT = N->getOperand(0).getValueType(); + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, @@ -985,7 +1087,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { - EVT OpVT = N->getOperand(0).getValueType(); + EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType(); return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index 9f5c32421dcc1..05b129ceeeaad 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -1,10 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx \ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android \ ; RUN: -enable-legalize-types-checking \ ; RUN: -disable-strictnode-mutation | FileCheck %s -; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx \ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu \ ; RUN: -enable-legalize-types-checking \ ; RUN: -disable-strictnode-mutation | FileCheck %s +; RUN: llc < %s -O2 -mtriple=i686-linux-gnu -mattr=+sse2 \ +; RUN: -enable-legalize-types-checking \ +; RUN: -disable-strictnode-mutation | FileCheck %s --check-prefix=X86 ; Check all soft floating point library function calls. @@ -15,6 +18,39 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq __addtf3 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: add: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __addtf3 +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %add = call fp128 @llvm.experimental.constrained.fadd.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %add @@ -27,6 +63,39 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq __subtf3 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: sub: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __subtf3 +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %sub = call fp128 @llvm.experimental.constrained.fsub.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %sub @@ -39,6 +108,39 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq __multf3 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: mul: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __multf3 +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %mul = call fp128 @llvm.experimental.constrained.fmul.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %mul @@ -51,6 +153,39 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq __divtf3 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: div: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __divtf3 +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %div = call fp128 @llvm.experimental.constrained.fdiv.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %div @@ -63,6 +198,43 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp { ; CHECK-NEXT: callq fmal ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: fma: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll fmal +; X86-NEXT: addl $60, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %fma = call fp128 @llvm.experimental.constrained.fma.f128(fp128 %x, fp128 %y, fp128 %z, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %fma @@ -75,6 +247,39 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq fmodl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: frem: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll fmodl +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %div = call fp128 @llvm.experimental.constrained.frem.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %div @@ -87,6 +292,35 @@ define fp128 @ceil(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq ceill ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: ceil: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll ceill +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %ceil = call fp128 @llvm.experimental.constrained.ceil.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %ceil @@ -99,6 +333,35 @@ define fp128 @cos(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq cosl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: cos: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll cosl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %cos = call fp128 @llvm.experimental.constrained.cos.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %cos @@ -111,6 +374,35 @@ define fp128 @exp(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq expl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: exp: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll expl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %exp = call fp128 @llvm.experimental.constrained.exp.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %exp @@ -123,6 +415,35 @@ define fp128 @exp2(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq exp2l ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: exp2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll exp2l +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %exp2 = call fp128 @llvm.experimental.constrained.exp2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %exp2 @@ -135,6 +456,35 @@ define fp128 @floor(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq floorl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: floor: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll floorl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %floor = call fp128 @llvm.experimental.constrained.floor.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %floor @@ -147,6 +497,35 @@ define fp128 @log(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq logl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: log: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll logl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %log = call fp128 @llvm.experimental.constrained.log.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %log @@ -159,6 +538,35 @@ define fp128 @log10(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq log10l ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: log10: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll log10l +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %log10 = call fp128 @llvm.experimental.constrained.log10.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %log10 @@ -171,6 +579,35 @@ define fp128 @log2(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq log2l ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: log2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll log2l +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %log2 = call fp128 @llvm.experimental.constrained.log2.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %log2 @@ -183,6 +620,39 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq fmaxl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: maxnum: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll fmaxl +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %maxnum = call fp128 @llvm.experimental.constrained.maxnum.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %maxnum @@ -195,6 +665,39 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq fminl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: minnum: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll fminl +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %minnum = call fp128 @llvm.experimental.constrained.minnum.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %minnum @@ -207,6 +710,35 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq nearbyintl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: nearbyint: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll nearbyintl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %nearbyint = call fp128 @llvm.experimental.constrained.nearbyint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %nearbyint @@ -219,6 +751,39 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp { ; CHECK-NEXT: callq powl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: pow: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll powl +; X86-NEXT: addl $44, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %pow = call fp128 @llvm.experimental.constrained.pow.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %pow @@ -231,6 +796,36 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp { ; CHECK-NEXT: callq __powitf2 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: powi: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $8, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __powitf2 +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %powi = call fp128 @llvm.experimental.constrained.powi.f128(fp128 %x, i32 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %powi @@ -243,6 +838,35 @@ define fp128 @rint(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq rintl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: rint: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll rintl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %rint = call fp128 @llvm.experimental.constrained.rint.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %rint @@ -255,6 +879,35 @@ define fp128 @round(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq roundl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: round: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll roundl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %round = call fp128 @llvm.experimental.constrained.round.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %round @@ -267,6 +920,35 @@ define fp128 @sin(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq sinl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: sin: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll sinl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %sin = call fp128 @llvm.experimental.constrained.sin.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %sin @@ -279,6 +961,35 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq sqrtl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: sqrt: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll sqrtl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %sqrt @@ -291,6 +1002,35 @@ define fp128 @trunc(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq truncl ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq +; +; X86-LABEL: trunc: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll truncl +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %trunc = call fp128 @llvm.experimental.constrained.trunc.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret fp128 %trunc @@ -303,6 +1043,17 @@ define i32 @lrint(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq lrintl ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq +; +; X86-LABEL: lrint: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll lrintl +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl entry: %rint = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret i32 %rint @@ -315,6 +1066,17 @@ define i64 @llrint(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq llrintl ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq +; +; X86-LABEL: llrint: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll llrintl +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl entry: %rint = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret i64 %rint @@ -327,6 +1089,17 @@ define i32 @lround(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq lroundl ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq +; +; X86-LABEL: lround: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll lroundl +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl entry: %round = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %round @@ -339,6 +1112,17 @@ define i64 @llround(fp128 %x) nounwind strictfp { ; CHECK-NEXT: callq llroundl ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq +; +; X86-LABEL: llround: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll llroundl +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl entry: %round = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %round From b98a0c7f6c9c7b38b6cd764e6f47e16e3d8c342c Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 27 Nov 2019 17:07:06 +0300 Subject: [PATCH 129/591] [clang][CodeGen] Implicit Conversion Sanitizer: handle increment/decrement (PR44054)(take 2) Summary: Implicit Conversion Sanitizer is *almost* feature complete. There aren't *that* much unsanitized things left, two major ones are increment/decrement (this patch) and bit fields. As it was discussed in [[ https://bugs.llvm.org/show_bug.cgi?id=39519 | PR39519 ]], unlike `CompoundAssignOperator` (which is promoted internally), or `BinaryOperator` (for which we always have promotion/demotion in AST) or parts of `UnaryOperator` (we have promotion/demotion but only for certain operations), for inc/dec, clang omits promotion/demotion altogether, under as-if rule. This is technically correct: https://rise4fun.com/Alive/zPgD As it can be seen in `InstCombineCasts.cpp` `canEvaluateTruncated()`, `add`/`sub`/`mul`/`and`/`or`/`xor` operators can all arbitrarily be extended or truncated: https://github.com/llvm/llvm-project/blob/901cd3b3f62d0c700e5d2c3f97eff97d634bec5e/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp#L1320-L1334 But that has serious implications: 1. Since we no longer model implicit casts, do we pessimise their AST representation and everything that uses it? 2. There is no demotion, so lossy demotion sanitizer does not trigger :] Now, i'm not going to argue about the first problem here, but the second one **needs** to be addressed. As it was stated in the report, this is done intentionally, so changing this in all modes would be considered a penalization/regression. Which means, the sanitization-less codegen must not be altered. It was also suggested to not change the sanitized codegen to the one with demotion, but i quite strongly believe that will not be the wise choice here: 1. One will need to re-engineer the check that the inc/dec was lossy in terms of `@llvm.{u,s}{add,sub}.with.overflow` builtins 2. We will still need to compute the result we would lossily demote. (i.e. the result of wide `add`ition/`sub`traction) 3. I suspect it would need to be done right here, in sanitization. Which kinda defeats the point of using `@llvm.{u,s}{add,sub}.with.overflow` builtins: we'd have two `add`s with basically the same arguments, one of which is used for check+error-less codepath and other one for the error reporting. That seems worse than a single wide op+check. 4. OR, we would need to do that in the compiler-rt handler. Which means we'll need a whole new handler. But then what about the `CompoundAssignOperator`, it would also be applicable for it. So this also doesn't really seem like the right path to me. 5. At least X86 (but likely others) pessimizes all sub-`i32` operations (due to partial register stalls), so even if we avoid promotion+demotion, the computations will //likely// be performed in `i32` anyways. So i'm not really seeing much benefit of not doing the straight-forward thing. While looking into this, i have noticed a few more LLVM middle-end missed canonicalizations, and filed [[ https://bugs.llvm.org/show_bug.cgi?id=44100 | PR44100 ]], [[ https://bugs.llvm.org/show_bug.cgi?id=44102 | PR44102 ]]. Those are not specific to inc/dec, we also have them for `CompoundAssignOperator`, and it can happen for normal arithmetics, too. But if we take some other path in the patch, it will not be applicable here, and we will have most likely played ourselves. TLDR: front-end should emit canonical, easy-to-optimize yet un-optimized code. It is middle-end's job to make it optimal. I'm really hoping reviewers agree with my personal assessment of the path this patch should take.. This originally landed in 9872ea4ed1de4c49300430e4f1f4dfc110a79ab9 but got immediately reverted in cbfa237892e55b7129a1178c9b03f26683d643af because the assertion was faulty. That fault ended up being caused by the enum - while there will be promotion, both types are unsigned, with same width. So we still don't need to sanitize non-signed cases. So far. Maybe the assert will tell us this isn't so. Fixes [[ https://bugs.llvm.org/show_bug.cgi?id=44054 | PR44054 ]]. Refs. https://github.com/google/sanitizers/issues/940 Reviewers: rjmccall, erichkeane, rsmith, vsk Reviewed By: erichkeane Subscribers: mehdi_amini, dexonsmith, cfe-commits, #sanitizers, llvm-commits, aaron.ballman, t.p.northover, efriedma, regehr Tags: #llvm, #clang, #sanitizers Differential Revision: https://reviews.llvm.org/D70539 --- clang/docs/ReleaseNotes.rst | 4 + clang/lib/CodeGen/CGExprScalar.cpp | 59 +++- ...ch-implicit-conversions-basics-negatives.c | 12 + ...catch-implicit-conversions-incdec-basics.c | 139 ++++++++ ...er-arithmetic-value-change-incdec-basics.c | 139 ++++++++ ...plicit-integer-conversions-incdec-basics.c | 139 ++++++++ ...licit-integer-sign-changes-incdec-basics.c | 139 ++++++++ ...tch-implicit-integer-sign-changes-incdec.c | 307 ++++++++++++++++++ ...plicit-integer-truncations-incdec-basics.c | 139 ++++++++ ...signed-integer-truncations-incdec-basics.c | 139 ++++++++ ...plicit-signed-integer-truncations-incdec.c | 303 +++++++++++++++++ ...signed-integer-truncations-incdec-basics.c | 101 ++++++ .../integer-conversion-incdec.c | 122 +++++++ .../integer-sign-change-incdec.c | 120 +++++++ .../signed-integer-truncation-incdec.c | 122 +++++++ 15 files changed, 1979 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c create mode 100644 clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c create mode 100644 clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c create mode 100644 clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c create mode 100644 clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c create mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c create mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c create mode 100644 compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4ac300deb589a..37a8f30e0bc9c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -78,6 +78,10 @@ Non-comprehensive list of changes in this release been extended to detect these cases, so that code relying on them can be detected and fixed. +* The Implicit Conversion Sanitizer (``-fsanitize=implicit-conversion``) has + learned to sanitize pre/post increment/decrement of types with bit width + smaller than ``int``. + - For X86 target, -march=skylake-avx512, -march=icelake-client, -march=icelake-server, -march=cascadelake, -march=cooperlake will default to not using 512-bit zmm registers in vectorized code unless 512-bit intrinsics diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 8229766406434..d727e326a27a5 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -976,6 +976,11 @@ EmitIntegerTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, return std::make_pair(Kind, std::make_pair(Check, Mask)); } +static bool PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck( + QualType SrcType, QualType DstType) { + return SrcType->isIntegerType() && DstType->isIntegerType(); +} + void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, Value *Dst, QualType DstType, SourceLocation Loc) { @@ -984,7 +989,8 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, // We only care about int->int conversions here. // We ignore conversions to/from pointer and/or bool. - if (!(SrcType->isIntegerType() && DstType->isIntegerType())) + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) return; unsigned SrcBits = Src->getType()->getScalarSizeInBits(); @@ -1095,7 +1101,8 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, // We only care about int->int conversions here. // We ignore conversions to/from pointer and/or bool. - if (!(SrcType->isIntegerType() && DstType->isIntegerType())) + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) return; bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); @@ -2419,9 +2426,51 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // Most common case by far: integer increment. } else if (type->isIntegerType()) { - // Note that signed integer inc/dec with width less than int can't - // overflow because of promotion rules; we're just eliding a few steps here. - if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { + QualType promotedType; + bool canPerformLossyDemotionCheck = false; + if (type->isPromotableIntegerType()) { + promotedType = CGF.getContext().getPromotedIntegerType(type); + assert(promotedType != type && "Shouldn't promote to the same type."); + canPerformLossyDemotionCheck = true; + canPerformLossyDemotionCheck &= + CGF.getContext().getCanonicalType(type) != + CGF.getContext().getCanonicalType(promotedType); + canPerformLossyDemotionCheck &= + PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck( + type, promotedType); + assert((!canPerformLossyDemotionCheck || + type->isSignedIntegerOrEnumerationType() || + promotedType->isSignedIntegerOrEnumerationType() || + ConvertType(type)->getScalarSizeInBits() == + ConvertType(promotedType)->getScalarSizeInBits()) && + "The following check expects that if we do promotion to different " + "underlying canonical type, at least one of the types (either " + "base or promoted) will be signed, or the bitwidths will match."); + } + if (CGF.SanOpts.hasOneOf( + SanitizerKind::ImplicitIntegerArithmeticValueChange) && + canPerformLossyDemotionCheck) { + // While `x += 1` (for `x` with width less than int) is modeled as + // promotion+arithmetics+demotion, and we can catch lossy demotion with + // ease; inc/dec with width less than int can't overflow because of + // promotion rules, so we omit promotion+demotion, which means that we can + // not catch lossy "demotion". Because we still want to catch these cases + // when the sanitizer is enabled, we perform the promotion, then perform + // the increment/decrement in the wider type, and finally + // perform the demotion. This will catch lossy demotions. + + value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); + Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); + value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); + // Do pass non-default ScalarConversionOpts so that sanitizer check is + // emitted. + value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), + ScalarConversionOpts(CGF.SanOpts)); + + // Note that signed integer inc/dec with width less than int can't + // overflow because of promotion rules; we're just eliding a few steps + // here. + } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); } else if (E->canOverflow() && type->isUnsignedIntegerType() && CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { diff --git a/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c b/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c new file mode 100644 index 0000000000000..2e060cfcddef3 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// If we have an enum, it will be promoted to an unsigned integer. +// But both types are unsigned, and have same bitwidth. +// So we should not emit any sanitization. Also, for inc/dec we currently assume +// (assert) that we will only have cases where at least one of the types +// is signed, which isn't the case here. +typedef enum { a } b; +b t0(b c) { + c--; + return c; +} diff --git a/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c new file mode 100644 index 0000000000000..e97a72cb0a339 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c new file mode 100644 index 0000000000000..5e0aa1108dfc9 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c new file mode 100644 index 0000000000000..e97a72cb0a339 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c new file mode 100644 index 0000000000000..93495b331b9f8 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 3 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c new file mode 100644 index 0000000000000..41e08ee32a525 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c @@ -0,0 +1,307 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE + +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fno-sanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-recover=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -fsanitize=implicit-integer-sign-change -fsanitize-trap=implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 3 } + +unsigned short t0(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t0( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t0( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 100 + return x++; +} +unsigned short t1(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t1( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t1( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 200 + return x--; +} + +unsigned short t2(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t2( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t2( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 300 + return ++x; +} + +unsigned short t3(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t3( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t3( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], false, !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 400 + return --x; +} + +signed short t4(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t4( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t4( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 500 + return x++; +} +signed short t5(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t5( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t5( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 600 + return x--; +} + +signed short t6(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t6( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t6( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 700 + return ++x; +} + +signed short t7(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t7( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t7( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[SRC_INC_NEGATIVITYCHECK:%.*]] = icmp slt i32 [[INC]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[DST_NEGATIVITYCHECK:%.*]] = icmp slt i16 [[X_PROMOTED_DEMOTED]], 0, !nosanitize !2 +// CHECK-SANITIZE-NEXT: [[SIGNCHANGECHECK:%.*]] = icmp eq i1 [[SRC_INC_NEGATIVITYCHECK]], [[DST_NEGATIVITYCHECK]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[SIGNCHANGECHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 800 + return --x; +} diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..6ac2be6d9fd0c --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..b7e438c7229ce --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, {{.*}}* @[[INT]], {{.*}}* @[[SIGNED_CHAR]], i8 2 } + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*) +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*) +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*) +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*) +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*) +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*) +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*) +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*) +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_900]] to i8*) +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1000]] to i8*) +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1100]] to i8*) +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1200]] to i8*) +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1300]] to i8*) +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1400]] to i8*) +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1500]] to i8*) +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { + // CHECK: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_1600]] to i8*) +#line 1600 + --x; + return x; +} diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c new file mode 100644 index 0000000000000..1e0bad1844c50 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c @@ -0,0 +1,303 @@ +// RUN: %clang_cc1 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NOSANITIZE + +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fno-sanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-NORECOVER,CHECK-SANITIZE-UNREACHABLE +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-ANYRECOVER,CHECK-SANITIZE-RECOVER +// RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-trap=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK,CHECK-SANITIZE,CHECK-SANITIZE-TRAP,CHECK-SANITIZE-UNREACHABLE + +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, {{.*}}* @[[INT]], {{.*}}* @[[SHORT]], i8 2 } + +unsigned short t0(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t0( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t0( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_100]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 100 + return x++; +} +unsigned short t1(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t1( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t1( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_200]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 200 + return x--; +} + +unsigned short t2(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t2( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t2( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_300]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 300 + return ++x; +} + +unsigned short t3(unsigned short x) { +// CHECK-NOSANITIZE-LABEL: @t3( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t3( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = zext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_400]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 400 + return --x; +} + +signed short t4(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t4( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t4( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_500]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 500 + return x++; +} +signed short t5(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t5( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[X_RELOADED]] +// +// CHECK-SANITIZE-LABEL: @t5( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_600]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_RELOADED]] +#line 600 + return x--; +} + +signed short t6(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t6( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], 1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t6( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], 1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_700]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 700 + return ++x; +} + +signed short t7(signed short x) { +// CHECK-NOSANITIZE-LABEL: @t7( +// CHECK-NOSANITIZE-NEXT: entry: +// CHECK-NOSANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-NOSANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: [[INC:%.*]] = add i16 [[X_RELOADED]], -1 +// CHECK-NOSANITIZE-NEXT: store i16 [[INC]], i16* [[X_ADDR]], align 2 +// CHECK-NOSANITIZE-NEXT: ret i16 [[INC]] +// +// CHECK-SANITIZE-LABEL: @t7( +// CHECK-SANITIZE-NEXT: entry: +// CHECK-SANITIZE-NEXT: [[X_ADDR:%.*]] = alloca i16, align 2 +// CHECK-SANITIZE-NEXT: store i16 [[X:%.*]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_RELOADED:%.*]] = load i16, i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED:%.*]] = sext i16 [[X_RELOADED]] to i32 +// CHECK-SANITIZE-NEXT: [[INC:%.*]] = add i32 [[X_PROMOTED]], -1 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED:%.*]] = trunc i32 [[INC]] to i16 +// CHECK-SANITIZE-NEXT: [[X_PROMOTED_DEMOTED_PROMOTED:%.*]] = sext i16 [[X_PROMOTED_DEMOTED]] to i32, !nosanitize +// CHECK-SANITIZE-NEXT: [[TRUNCHECK:%.*]] = icmp eq i32 [[X_PROMOTED_DEMOTED_PROMOTED]], [[INC]], !nosanitize +// CHECK-SANITIZE-NEXT: br i1 [[TRUNCHECK]], label %[[CONT:.*]], label %[[HANDLER_IMPLICIT_X_PROMOTEDERSION:[^,]+]],{{.*}} !nosanitize +// CHECK-SANITIZE: [[HANDLER_IMPLICIT_X_PROMOTEDERSION]]: +// CHECK-SANITIZE-TRAP-NEXT: call void @llvm.trap(){{.*}}, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64, !nosanitize +// CHECK-SANITIZE-ANYRECOVER-NEXT: [[TMP2:%.*]] = zext i16 [[X_PROMOTED_DEMOTED]] to i64, !nosanitize +// CHECK-SANITIZE-NORECOVER-NEXT: call void @__ubsan_handle_implicit_conversion_abort(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: call void @__ubsan_handle_implicit_conversion(i8* bitcast ({ {{{.*}}}, {{{.*}}}*, {{{.*}}}*, i8 }* @[[LINE_800]] to i8*), i64 [[TMP1]], i64 [[TMP2]]) #2, !nosanitize +// CHECK-SANITIZE-UNREACHABLE-NEXT: unreachable, !nosanitize +// CHECK-SANITIZE-RECOVER-NEXT: br label %[[CONT]], !nosanitize +// CHECK-SANITIZE: [[CONT]]: +// CHECK-SANITIZE-NEXT: store i16 [[X_PROMOTED_DEMOTED]], i16* [[X_ADDR]], align 2 +// CHECK-SANITIZE-NEXT: ret i16 [[X_PROMOTED_DEMOTED]] +#line 800 + return --x; +} diff --git a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c new file mode 100644 index 0000000000000..7ad12314f3df0 --- /dev/null +++ b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-incdec-basics.c @@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK + +// CHECK-LABEL: @t0( +unsigned short t0(unsigned short x) { +#line 100 + x++; + return x; +} +// CHECK-LABEL: @t1( +unsigned short t1(unsigned short x) { +#line 200 + x--; + return x; +} +// CHECK-LABEL: @t2( +unsigned short t2(unsigned short x) { +#line 300 + ++x; + return x; +} +// CHECK-LABEL: @t3( +unsigned short t3(unsigned short x) { +#line 400 + --x; + return x; +} + +// CHECK-LABEL: @t4( +signed short t4(signed short x) { +#line 500 + x++; + return x; +} +// CHECK-LABEL: @t5( +signed short t5(signed short x) { +#line 600 + x--; + return x; +} +// CHECK-LABEL: @t6( +signed short t6(signed short x) { +#line 700 + ++x; + return x; +} +// CHECK-LABEL: @t7( +signed short t7(signed short x) { +#line 800 + --x; + return x; +} + +// CHECK-LABEL: @t8( +unsigned char t8(unsigned char x) { +#line 900 + x++; + return x; +} +// CHECK-LABEL: @t9( +unsigned char t9(unsigned char x) { +#line 1000 + x--; + return x; +} +// CHECK-LABEL: @t10( +unsigned char t10(unsigned char x) { +#line 1100 + ++x; + return x; +} +// CHECK-LABEL: @t11( +unsigned char t11(unsigned char x) { +#line 1200 + --x; + return x; +} + +// CHECK-LABEL: @t12( +signed char t12(signed char x) { +#line 1300 + x++; + return x; +} +// CHECK-LABEL: @t13( +signed char t13(signed char x) { +#line 1400 + x--; + return x; +} +// CHECK-LABEL: @t14( +signed char t14(signed char x) { +#line 1500 + ++x; + return x; +} +// CHECK-LABEL: @t15( +signed char t15(signed char x) { +#line 1600 + --x; + return x; +} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c new file mode 100644 index 0000000000000..0e62c02d3affb --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-conversion-incdec.c @@ -0,0 +1,122 @@ +// RUN: %clang -x c -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + x = 255; + ++x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}integer-conversion-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c new file mode 100644 index 0000000000000..4b56a105aa289 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/integer-sign-change-incdec.c @@ -0,0 +1,120 @@ +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-integer-sign-change -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-integer-sign-change -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + x = 255; + ++x; + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}integer-sign-change-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c new file mode 100644 index 0000000000000..4806efb24eb13 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/signed-integer-truncation-incdec.c @@ -0,0 +1,122 @@ +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-signed-integer-truncation -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-signed-integer-truncation -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --implicit-check-not="implicit conversion" --check-prefixes=CHECK + +void test_unsigned() { + unsigned char x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + x = 0; + --x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'unsigned char' changed the value to 255 (8-bit, unsigned) + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 254; + x++; + x = 254; + ++x; + + x = 254; + x--; + x = 254; + --x; + + x = 255; + x++; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + x = 255; + ++x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'unsigned char' changed the value to 0 (8-bit, unsigned) + + x = 255; + x--; + x = 255; + --x; +} + +void test_signed() { + signed char x; + + x = -128; + x++; + x = -128; + ++x; + + x = -128; + x--; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + x = -128; + --x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'signed char' changed the value to 127 (8-bit, signed) + + x = -1; + x++; + x = -1; + ++x; + + x = -1; + x--; + x = -1; + --x; + + x = 0; + x++; + x = 0; + ++x; + + x = 0; + x--; + x = 0; + --x; + + x = 1; + x++; + x = 1; + ++x; + + x = 1; + x--; + x = 1; + --x; + + x = 127; + x++; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:4: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + x = 127; + ++x; + // CHECK: {{.*}}signed-integer-truncation-incdec.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'signed char' changed the value to -128 (8-bit, signed) + + x = 127; + x--; + x = 127; + --x; +} + +int main() { + test_unsigned(); + test_signed(); + + return 0; +} From 5c5e860535d8924a3d6eb950bb8a4945df01e9b7 Mon Sep 17 00:00:00 2001 From: Gabor Horvath Date: Wed, 27 Nov 2019 10:56:36 -0800 Subject: [PATCH 130/591] [clang-tidy] Fix PR35824 Differential Revision: https://reviews.llvm.org/D46027 --- .../bugprone/SuspiciousSemicolonCheck.cpp | 3 +- ...ugprone-suspicious-semicolon-constexpr.cpp | 31 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 clang-tools-extra/test/clang-tidy/bugprone-suspicious-semicolon-constexpr.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp index d94731beba945..9b34f5ab55a7f 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.cpp @@ -20,7 +20,8 @@ namespace bugprone { void SuspiciousSemicolonCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher( stmt(anyOf(ifStmt(hasThen(nullStmt().bind("semi")), - unless(hasElse(stmt()))), + unless(hasElse(stmt())), + unless(isConstexpr())), forStmt(hasBody(nullStmt().bind("semi"))), cxxForRangeStmt(hasBody(nullStmt().bind("semi"))), whileStmt(hasBody(nullStmt().bind("semi"))))) diff --git a/clang-tools-extra/test/clang-tidy/bugprone-suspicious-semicolon-constexpr.cpp b/clang-tools-extra/test/clang-tidy/bugprone-suspicious-semicolon-constexpr.cpp new file mode 100644 index 0000000000000..c18dd7bd1e932 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/bugprone-suspicious-semicolon-constexpr.cpp @@ -0,0 +1,31 @@ +// RUN: %check_clang_tidy %s bugprone-suspicious-semicolon %t -- -- -std=c++17 + +void fail() +{ + int x = 0; + if(x > 5); (void)x; + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: potentially unintended semicolon [bugprone-suspicious-semicolon] + // CHECK-FIXES: if(x > 5) (void)x; +} + +template +int foo(int a) { + if constexpr(X > 0) { + return a; + } + return a + 1; +} + +template +int foo2(int a) { + // FIXME: diagnose the case below. See https://reviews.llvm.org/D46234 + // for details. + if constexpr(X > 0); + return a; + return a + 1; +} + +int main(void) { + foo2<0>(1); + return foo<0>(1); +} From 5c166f1d1969e9c1e5b72aa672add429b9c22b53 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 27 Nov 2019 13:33:11 -0500 Subject: [PATCH 131/591] [x86] make SLM extract vector element more expensive than default I'm not sure what the effect of this change will be on all of the affected tests or a larger benchmark, but it fixes the horizontal add/sub problems noted here: https://reviews.llvm.org/D59710?vs=227972&id=228095&whitespace=ignore-most#toc The costs are based on reciprocal throughput numbers in Agner's tables for PEXTR*; these appear to be very slow ops on Silvermont. This is a small step towards the larger motivation discussed in PR43605: https://bugs.llvm.org/show_bug.cgi?id=43605 Also, it seems likely that insert/extract is the source of perf regressions on other CPUs (up to 30%) that were cited as part of the reason to revert D59710, so maybe we'll extend the table-based approach to other subtargets. Differential Revision: https://reviews.llvm.org/D70607 --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 14 + llvm/test/Analysis/CostModel/X86/fptosi.ll | 59 +- llvm/test/Analysis/CostModel/X86/fptoui.ll | 59 +- .../X86/shuffle-extract_subvector.ll | 654 ++++++++---- .../Analysis/CostModel/X86/vector-extract.ll | 680 +++++++++++-- .../LoopVectorize/X86/interleaving.ll | 12 +- .../SLPVectorizer/X86/alternate-cast.ll | 98 +- .../SLPVectorizer/X86/alternate-int.ll | 41 +- .../test/Transforms/SLPVectorizer/X86/hadd.ll | 57 +- .../test/Transforms/SLPVectorizer/X86/hsub.ll | 57 +- .../test/Transforms/SLPVectorizer/X86/sext.ll | 938 ++++++++++------- .../test/Transforms/SLPVectorizer/X86/zext.ll | 954 ++++++++++++------ 12 files changed, 2519 insertions(+), 1104 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0b3a5319baac3..f64fedd8cbb6a 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2377,6 +2377,13 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, } int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + static const CostTblEntry SLMCostTbl[] = { + { ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 }, + { ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 }, + { ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 }, + { ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 } + }; + assert(Val->isVectorTy() && "This must be a vector type"); Type *ScalarType = Val->getScalarType(); @@ -2396,6 +2403,13 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { // Floating point scalars are already located in index #0. if (ScalarType->isFloatingPointTy() && Index == 0) return 0; + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Unexpected vector opcode"); + MVT MScalarTy = LT.second.getScalarType(); + if (ST->isSLM()) + if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy)) + return LT.first * Entry->Cost; } // Add to the base cost if we know that the extracted element of a vector is diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll index 7583d6e60c809..bb03b56e48f60 100644 --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -6,7 +6,7 @@ ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ ; -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 @@ -39,6 +39,13 @@ define i32 @fptosi_double_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> @@ -75,6 +82,13 @@ define i32 @fptosi_double_i32(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> @@ -111,6 +125,13 @@ define i32 @fptosi_double_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> @@ -147,6 +168,13 @@ define i32 @fptosi_double_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_double_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_double_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> @@ -194,6 +222,14 @@ define i32 @fptosi_float_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> @@ -218,6 +254,13 @@ define i32 @fptosi_float_i32(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> @@ -254,6 +297,13 @@ define i32 @fptosi_float_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> @@ -290,6 +340,13 @@ define i32 @fptosi_float_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptosi_float_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptosi_float_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll index 078b21ba72033..cdb3e5486604f 100644 --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -6,7 +6,7 @@ ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ ; -; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=SLM ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 ; RUN: opt < %s -mtriple=x86_64-apple-darwin -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 @@ -39,6 +39,13 @@ define i32 @fptoui_double_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> @@ -75,6 +82,13 @@ define i32 @fptoui_double_i32(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> @@ -111,6 +125,13 @@ define i32 @fptoui_double_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> @@ -147,6 +168,13 @@ define i32 @fptoui_double_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_double_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_double_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> @@ -194,6 +222,14 @@ define i32 @fptoui_float_i64(i32 %arg) { ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> @@ -232,6 +268,13 @@ define i32 @fptoui_float_i32(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> @@ -268,6 +311,13 @@ define i32 @fptoui_float_i16(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> @@ -304,6 +354,13 @@ define i32 @fptoui_float_i8(i32 %arg) { ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; +; SLM-LABEL: 'fptoui_float_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; ; BTVER2-LABEL: 'fptoui_float_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll index 3ceba32744b6b..4ed509ff9db09 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll @@ -8,8 +8,8 @@ ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+avx512f,+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,SLM +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,GLM ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 ; @@ -270,64 +270,123 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512 } define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src512) { -; SSE-LABEL: 'test_vXi16' -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'test_vXi16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi16' ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> @@ -506,6 +565,124 @@ define void @test_vXi16(<4 x i16> %src64, <8 x i16> %src128, <16 x i16> %src256, ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; +; SLM-LABEL: 'test_vXi16' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; GLM-LABEL: 'test_vXi16' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <8 x i16> %src128, <8 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <8 x i16> %src128, <8 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_2345 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_6789 = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19 = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_02_03_04_05 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_06_07_08_09 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i16> %src512, <32 x i16> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ; BTVER2-LABEL: 'test_vXi16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> undef, <2 x i32> @@ -863,125 +1040,6 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void -; ; AVX-LABEL: 'test_vXi8' ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> @@ -1339,6 +1397,244 @@ define void @test_vXi8(<8 x i8> %src64, <16 x i8> %src128, <32 x i8> %src256, <6 ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; +; SLM-LABEL: 'test_vXi8' +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; GLM-LABEL: 'test_vXi8' +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_45 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_67 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_0123 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_4567 = shufflevector <8 x i8> %src64, <8 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_45 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_67 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89 = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CD = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_EF = shufflevector <16 x i8> %src128, <16 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_0123 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_2345 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_4567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128_6789 = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89AB = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_CDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V128_01234567 = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_89ABCDEF = shufflevector <16 x i8> %src128, <16 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19 = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_02_03_04_05 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256_06_07_08_09 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17 = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <32 x i8> %src256, <32 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39 = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <2 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <4 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37 = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <8 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <16 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_00_01_02_03_04_05_06_07_08_09_0A_0B_0C_0D_0E_0F_10_11_12_13_14_15_16_17_18_19_1A_1B_1C_1D_1E_1F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V512_20_21_22_23_24_25_26_27_28_29_2A_2B_2C_2D_2E_2F_30_31_32_33_34_35_36_37_38_39_3A_3B_3C_3D_3E_3F = shufflevector <64 x i8> %src512, <64 x i8> undef, <32 x i32> +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; ; BTVER2-LABEL: 'test_vXi8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_23 = shufflevector <8 x i8> %src64, <8 x i8> undef, <2 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/vector-extract.ll b/llvm/test/Analysis/CostModel/X86/vector-extract.ll index 62123c422a8f5..ddb3654fbc6ab 100644 --- a/llvm/test/Analysis/CostModel/X86/vector-extract.ll +++ b/llvm/test/Analysis/CostModel/X86/vector-extract.ll @@ -9,8 +9,8 @@ ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW ; -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 -; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42 +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,SLM +; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,SSE,SSE42,GLM ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -cost-model -analyze -mcpu=btver2 | FileCheck %s --check-prefixes=BTVER2 define i32 @extract_double(i32 %arg) { @@ -188,19 +188,117 @@ define i32 @extract_float(i32 %arg) { } define i32 @extract_i64(i32 %arg) { -; CHECK-LABEL: 'extract_i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i64' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i64' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i64' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i64' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i64' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i64' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = extractelement <2 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = extractelement <2 x i64> undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_a = extractelement <4 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_0 = extractelement <4 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_3 = extractelement <4 x i64> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_a = extractelement <8 x i64> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_0 = extractelement <8 x i64> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_3 = extractelement <8 x i64> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_4 = extractelement <8 x i64> undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_7 = extractelement <8 x i64> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_a = extractelement <2 x i64> undef, i32 %arg @@ -234,24 +332,157 @@ define i32 @extract_i64(i32 %arg) { } define i32 @extract_i32(i32 %arg) { -; CHECK-LABEL: 'extract_i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i32' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i32' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i32' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i32' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = extractelement <2 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = extractelement <2 x i32> undef, i32 1 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_a = extractelement <4 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = extractelement <4 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = extractelement <4 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_a = extractelement <8 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = extractelement <8 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_3 = extractelement <8 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_4 = extractelement <8 x i32> undef, i32 4 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_7 = extractelement <8 x i32> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_a = extractelement <16 x i32> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_0 = extractelement <16 x i32> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_3 = extractelement <16 x i32> undef, i32 3 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_8 = extractelement <16 x i32> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_15 = extractelement <16 x i32> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_a = extractelement <2 x i32> undef, i32 %arg @@ -296,24 +527,157 @@ define i32 @extract_i32(i32 %arg) { } define i32 @extract_i16(i32 %arg) { -; CHECK-LABEL: 'extract_i16' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i16' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i16' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i16' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i16' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i16' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i16' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i16' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i16' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = extractelement <8 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = extractelement <8 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = extractelement <16 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = extractelement <16 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = extractelement <16 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = extractelement <16 x i16> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = extractelement <16 x i16> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = extractelement <32 x i16> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = extractelement <32 x i16> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = extractelement <32 x i16> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = extractelement <32 x i16> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = extractelement <32 x i16> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = extractelement <32 x i16> undef, i32 16 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = extractelement <32 x i16> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = extractelement <32 x i16> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = extractelement <8 x i16> undef, i32 %arg @@ -357,29 +721,197 @@ define i32 @extract_i16(i32 %arg) { } define i32 @extract_i8(i32 %arg) { -; CHECK-LABEL: 'extract_i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE2-LABEL: 'extract_i8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE3-LABEL: 'extract_i8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSSE3-LABEL: 'extract_i8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SSE41-LABEL: 'extract_i8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX-LABEL: 'extract_i8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'extract_i8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; SLM-LABEL: 'extract_i8' +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; GLM-LABEL: 'extract_i8' +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = extractelement <16 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = extractelement <16 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = extractelement <16 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = extractelement <32 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = extractelement <32 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = extractelement <32 x i8> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = extractelement <32 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = extractelement <32 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = extractelement <32 x i8> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = extractelement <32 x i8> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = extractelement <64 x i8> undef, i32 %arg +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = extractelement <64 x i8> undef, i32 0 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = extractelement <64 x i8> undef, i32 7 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = extractelement <64 x i8> undef, i32 8 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = extractelement <64 x i8> undef, i32 15 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = extractelement <64 x i8> undef, i32 24 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = extractelement <64 x i8> undef, i32 31 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = extractelement <64 x i8> undef, i32 32 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = extractelement <64 x i8> undef, i32 48 +; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = extractelement <64 x i8> undef, i32 63 +; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'extract_i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = extractelement <16 x i8> undef, i32 %arg diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll index 9294c92b5759f..f12f35702156f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaving.ll @@ -1,6 +1,6 @@ ; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine < %s | FileCheck %s --check-prefix=NORMAL -; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=slm < %s | FileCheck %s --check-prefix=NORMAL -; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=atom < %s | FileCheck %s --check-prefix=ATOM +; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=slm < %s | FileCheck %s --check-prefix=SLOW +; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=atom < %s | FileCheck %s --check-prefix=SLOW ; NORMAL-LABEL: foo ; NORMAL: %[[WIDE:.*]] = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 @@ -8,10 +8,10 @@ ; NORMAL: %[[STRIDED2:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> ; NORMAL: add nsw <4 x i32> %[[STRIDED2]], %[[STRIDED1]] -; ATOM-LABEL: foo -; ATOM: load i32 -; ATOM: load i32 -; ATOM: store i32 +; SLOW-LABEL: foo +; SLOW: load i32 +; SLOW: load i32 +; SLOW: store i32 define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) { entry: br label %for.body diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll index 8f8b1d443da84..9ee016e4331b8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -35,30 +35,9 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SSE-NEXT: ret <8 x float> [[R7]] ; ; SLM-LABEL: @sitofp_uitofp( -; SLM-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3 -; SLM-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4 -; SLM-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5 -; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6 -; SLM-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 -; SLM-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float -; SLM-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float -; SLM-NEXT: [[AB2:%.*]] = sitofp i32 [[A2]] to float -; SLM-NEXT: [[AB3:%.*]] = sitofp i32 [[A3]] to float -; SLM-NEXT: [[AB4:%.*]] = uitofp i32 [[A4]] to float -; SLM-NEXT: [[AB5:%.*]] = uitofp i32 [[A5]] to float -; SLM-NEXT: [[AB6:%.*]] = uitofp i32 [[A6]] to float -; SLM-NEXT: [[AB7:%.*]] = uitofp i32 [[A7]] to float -; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 -; SLM-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1 -; SLM-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2 -; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3 -; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4 -; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; SLM-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 +; SLM-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[A:%.*]] to <8 x float> +; SLM-NEXT: [[TMP2:%.*]] = uitofp <8 x i32> [[A]] to <8 x float> +; SLM-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[R7]] ; ; AVX-LABEL: @sitofp_uitofp( @@ -268,11 +247,50 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { } define <8 x i32> @sext_zext(<8 x i16> %a) { -; CHECK-LABEL: @sext_zext( -; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32> -; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[R7]] +; SSE-LABEL: @sext_zext( +; SSE-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32> +; SSE-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32> +; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; SSE-NEXT: ret <8 x i32> [[R7]] +; +; SLM-LABEL: @sext_zext( +; SLM-NEXT: [[A0:%.*]] = extractelement <8 x i16> [[A:%.*]], i32 0 +; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i16> [[A]], i32 1 +; SLM-NEXT: [[A2:%.*]] = extractelement <8 x i16> [[A]], i32 2 +; SLM-NEXT: [[A3:%.*]] = extractelement <8 x i16> [[A]], i32 3 +; SLM-NEXT: [[A4:%.*]] = extractelement <8 x i16> [[A]], i32 4 +; SLM-NEXT: [[A5:%.*]] = extractelement <8 x i16> [[A]], i32 5 +; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i16> [[A]], i32 6 +; SLM-NEXT: [[A7:%.*]] = extractelement <8 x i16> [[A]], i32 7 +; SLM-NEXT: [[AB0:%.*]] = sext i16 [[A0]] to i32 +; SLM-NEXT: [[AB1:%.*]] = sext i16 [[A1]] to i32 +; SLM-NEXT: [[AB2:%.*]] = sext i16 [[A2]] to i32 +; SLM-NEXT: [[AB3:%.*]] = sext i16 [[A3]] to i32 +; SLM-NEXT: [[AB4:%.*]] = zext i16 [[A4]] to i32 +; SLM-NEXT: [[AB5:%.*]] = zext i16 [[A5]] to i32 +; SLM-NEXT: [[AB6:%.*]] = zext i16 [[A6]] to i32 +; SLM-NEXT: [[AB7:%.*]] = zext i16 [[A7]] to i32 +; SLM-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0 +; SLM-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1 +; SLM-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2 +; SLM-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3 +; SLM-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4 +; SLM-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5 +; SLM-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6 +; SLM-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 +; SLM-NEXT: ret <8 x i32> [[R7]] +; +; AVX-LABEL: @sext_zext( +; AVX-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32> +; AVX-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32> +; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; AVX-NEXT: ret <8 x i32> [[R7]] +; +; AVX512-LABEL: @sext_zext( +; AVX512-NEXT: [[TMP1:%.*]] = sext <8 x i16> [[A:%.*]] to <8 x i32> +; AVX512-NEXT: [[TMP2:%.*]] = zext <8 x i16> [[A]] to <8 x i32> +; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; AVX512-NEXT: ret <8 x i32> [[R7]] ; %a0 = extractelement <8 x i16> %a, i32 0 %a1 = extractelement <8 x i16> %a, i32 1 @@ -383,26 +401,24 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 ; SSE-NEXT: ret <8 x float> [[R7]] ; ; SLM-LABEL: @sitofp_uitofp_4i32_8i16_16i8( -; SLM-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 ; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 ; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 ; SLM-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 ; SLM-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 -; SLM-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float -; SLM-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float -; SLM-NEXT: [[AB2:%.*]] = uitofp i32 [[A2]] to float -; SLM-NEXT: [[AB3:%.*]] = uitofp i32 [[A3]] to float +; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; SLM-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> ; SLM-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float ; SLM-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float ; SLM-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float ; SLM-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float -; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 -; SLM-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1 -; SLM-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2 -; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3 +; SLM-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 +; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP3]], i32 0 +; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 +; SLM-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP4]], i32 1 +; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; SLM-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP5]], i32 2 +; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 +; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP6]], i32 3 ; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4 ; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 ; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 44729b4a8d5a2..23d1634fdb6ce 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -75,42 +75,11 @@ define <4 x i32> @add_and_v4i32(<4 x i32> %a, <4 x i32> %b) { } define <4 x i32> @add_mul_v4i32(<4 x i32> %a, <4 x i32> %b) { -; SSE-LABEL: @add_mul_v4i32( -; SSE-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] -; SSE-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; SSE-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; SSE-NEXT: ret <4 x i32> [[R3]] -; -; SLM-LABEL: @add_mul_v4i32( -; SLM-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 -; SLM-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 -; SLM-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 -; SLM-NEXT: [[B0:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1 -; SLM-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2 -; SLM-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3 -; SLM-NEXT: [[AB0:%.*]] = mul i32 [[A0]], [[B0]] -; SLM-NEXT: [[AB1:%.*]] = add i32 [[A1]], [[B1]] -; SLM-NEXT: [[AB2:%.*]] = add i32 [[A2]], [[B2]] -; SLM-NEXT: [[AB3:%.*]] = mul i32 [[A3]], [[B3]] -; SLM-NEXT: [[R0:%.*]] = insertelement <4 x i32> undef, i32 [[AB0]], i32 0 -; SLM-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[AB1]], i32 1 -; SLM-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[AB2]], i32 2 -; SLM-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[AB3]], i32 3 -; SLM-NEXT: ret <4 x i32> [[R3]] -; -; AVX-LABEL: @add_mul_v4i32( -; AVX-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] -; AVX-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; AVX-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; AVX-NEXT: ret <4 x i32> [[R3]] -; -; AVX512-LABEL: @add_mul_v4i32( -; AVX512-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] -; AVX512-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; AVX512-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; AVX512-NEXT: ret <4 x i32> [[R3]] +; CHECK-LABEL: @add_mul_v4i32( +; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] +; CHECK-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[R3]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll index b02244f9614bf..71f72a93075b4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll @@ -78,34 +78,11 @@ define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) { } define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) { -; SSE-LABEL: @test_v2i64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] -; SSE-NEXT: ret <2 x i64> [[TMP3]] -; -; SLM-LABEL: @test_v2i64( -; SLM-NEXT: [[A0:%.*]] = extractelement <2 x i64> [[A:%.*]], i32 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <2 x i64> [[A]], i32 1 -; SLM-NEXT: [[B0:%.*]] = extractelement <2 x i64> [[B:%.*]], i32 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <2 x i64> [[B]], i32 1 -; SLM-NEXT: [[R0:%.*]] = add i64 [[A0]], [[A1]] -; SLM-NEXT: [[R1:%.*]] = add i64 [[B0]], [[B1]] -; SLM-NEXT: [[R00:%.*]] = insertelement <2 x i64> undef, i64 [[R0]], i32 0 -; SLM-NEXT: [[R01:%.*]] = insertelement <2 x i64> [[R00]], i64 [[R1]], i32 1 -; SLM-NEXT: ret <2 x i64> [[R01]] -; -; AVX-LABEL: @test_v2i64( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> -; AVX-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] -; AVX-NEXT: ret <2 x i64> [[TMP3]] -; -; AVX512-LABEL: @test_v2i64( -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> -; AVX512-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] -; AVX512-NEXT: ret <2 x i64> [[TMP3]] +; CHECK-LABEL: @test_v2i64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %a0 = extractelement <2 x i64> %a, i32 0 %a1 = extractelement <2 x i64> %a, i32 1 @@ -322,14 +299,10 @@ define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE-NEXT: ret <4 x i64> [[R03]] ; ; SLM-LABEL: @test_v4i64( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> -; SLM-NEXT: ret <4 x i64> [[R03]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]] +; SLM-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @test_v4i64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> @@ -374,14 +347,10 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: ret <8 x i32> [[R07]] ; ; SLM-LABEL: @test_v8i32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> -; SLM-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> -; SLM-NEXT: ret <8 x i32> [[R07]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; SLM-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP1]], [[TMP2]] +; SLM-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @test_v8i32( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll index d6e44aa1d6a04..b7e487eed9eb9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll @@ -78,34 +78,11 @@ define <4 x float> @test_v4f32(<4 x float> %a, <4 x float> %b) { } define <2 x i64> @test_v2i64(<2 x i64> %a, <2 x i64> %b) { -; SSE-LABEL: @test_v2i64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> -; SSE-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] -; SSE-NEXT: ret <2 x i64> [[TMP3]] -; -; SLM-LABEL: @test_v2i64( -; SLM-NEXT: [[A0:%.*]] = extractelement <2 x i64> [[A:%.*]], i32 0 -; SLM-NEXT: [[A1:%.*]] = extractelement <2 x i64> [[A]], i32 1 -; SLM-NEXT: [[B0:%.*]] = extractelement <2 x i64> [[B:%.*]], i32 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <2 x i64> [[B]], i32 1 -; SLM-NEXT: [[R0:%.*]] = sub i64 [[A0]], [[A1]] -; SLM-NEXT: [[R1:%.*]] = sub i64 [[B0]], [[B1]] -; SLM-NEXT: [[R00:%.*]] = insertelement <2 x i64> undef, i64 [[R0]], i32 0 -; SLM-NEXT: [[R01:%.*]] = insertelement <2 x i64> [[R00]], i64 [[R1]], i32 1 -; SLM-NEXT: ret <2 x i64> [[R01]] -; -; AVX-LABEL: @test_v2i64( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> -; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> -; AVX-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] -; AVX-NEXT: ret <2 x i64> [[TMP3]] -; -; AVX512-LABEL: @test_v2i64( -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> -; AVX512-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] -; AVX512-NEXT: ret <2 x i64> [[TMP3]] +; CHECK-LABEL: @test_v2i64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %a0 = extractelement <2 x i64> %a, i32 0 %a1 = extractelement <2 x i64> %a, i32 1 @@ -322,14 +299,10 @@ define <4 x i64> @test_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE-NEXT: ret <4 x i64> [[R03]] ; ; SLM-LABEL: @test_v4i64( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <2 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; SLM-NEXT: [[TMP3:%.*]] = sub <2 x i64> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <2 x i32> -; SLM-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R03:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP6]], <4 x i32> -; SLM-NEXT: ret <4 x i64> [[R03]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[A]], <4 x i64> [[B]], <4 x i32> +; SLM-NEXT: [[TMP3:%.*]] = sub <4 x i64> [[TMP1]], [[TMP2]] +; SLM-NEXT: ret <4 x i64> [[TMP3]] ; ; AVX-LABEL: @test_v4i64( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i32> @@ -374,14 +347,10 @@ define <8 x i32> @test_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: ret <8 x i32> [[R07]] ; ; SLM-LABEL: @test_v8i32( -; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <4 x i32> -; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> -; SLM-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP1]], [[TMP2]] -; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> -; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <4 x i32> -; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP4]], [[TMP5]] -; SLM-NEXT: [[R07:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> -; SLM-NEXT: ret <8 x i32> [[R07]] +; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> +; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> [[B]], <8 x i32> +; SLM-NEXT: [[TMP3:%.*]] = sub <8 x i32> [[TMP1]], [[TMP2]] +; SLM-NEXT: ret <8 x i32> [[TMP3]] ; ; AVX-LABEL: @test_v8i32( ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll index f3404831e213f..c3eba4701e996 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll @@ -11,26 +11,15 @@ ; define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) { -; SSE2-LABEL: @loadext_2i8_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: ret <2 x i64> [[V1]] -; -; SLM-LABEL: @loadext_2i8_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* -; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: ret <2 x i64> [[V1]] +; SSE-LABEL: @loadext_2i8_to_2i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SSE-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 @@ -54,40 +43,23 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) { } define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { -; SSE2-LABEL: @loadext_4i8_to_4i32( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 -; SSE2-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 -; SSE2-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i32 -; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i32 -; SSE2-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i32 -; SSE2-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i32 -; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1 -; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2 -; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3 -; SSE2-NEXT: ret <4 x i32> [[V3]] -; -; SLM-LABEL: @loadext_4i8_to_4i32( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 -; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 -; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 -; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 -; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 -; SLM-NEXT: ret <4 x i32> [[V3]] +; SSE-LABEL: @loadext_4i8_to_4i32( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SSE-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SSE-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SSE-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i32 +; SSE-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i32 +; SSE-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i32 +; SSE-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i32 +; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1 +; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2 +; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3 +; SSE-NEXT: ret <4 x i32> [[V3]] ; ; AVX-LABEL: @loadext_4i8_to_4i32( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 @@ -125,40 +97,23 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { } define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { -; SSE2-LABEL: @loadext_4i8_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 -; SSE2-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 -; SSE2-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 -; SSE2-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64 -; SSE2-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE2-NEXT: ret <4 x i64> [[V3]] -; -; SLM-LABEL: @loadext_4i8_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 -; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 -; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 -; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 -; SLM-NEXT: ret <4 x i64> [[V3]] +; SSE-LABEL: @loadext_4i8_to_4i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SSE-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SSE-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SSE-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 +; SSE-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64 +; SSE-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE-NEXT: ret <4 x i64> [[V3]] ; ; AVX1-LABEL: @loadext_4i8_to_4i64( ; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 @@ -232,34 +187,97 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { } define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { -; CHECK-LABEL: @loadext_8i8_to_8i16( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4 -; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5 -; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6 -; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 -; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7 -; CHECK-NEXT: ret <8 x i16> [[V7]] +; SSE2-LABEL: @loadext_8i8_to_8i16( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3 +; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4 +; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4 +; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5 +; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5 +; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6 +; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6 +; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 +; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7 +; SSE2-NEXT: ret <8 x i16> [[V7]] +; +; SLM-LABEL: @loadext_8i8_to_8i16( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 +; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 +; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 +; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i16 +; SLM-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i16 +; SLM-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i16 +; SLM-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i16 +; SLM-NEXT: [[X4:%.*]] = sext i8 [[I4]] to i16 +; SLM-NEXT: [[X5:%.*]] = sext i8 [[I5]] to i16 +; SLM-NEXT: [[X6:%.*]] = sext i8 [[I6]] to i16 +; SLM-NEXT: [[X7:%.*]] = sext i8 [[I7]] to i16 +; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[X3]], i32 3 +; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[X4]], i32 4 +; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[X5]], i32 5 +; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[X6]], i32 6 +; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[X7]], i32 7 +; SLM-NEXT: ret <8 x i16> [[V7]] +; +; AVX-LABEL: @loadext_8i8_to_8i16( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4 +; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5 +; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6 +; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 +; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7 +; AVX-NEXT: ret <8 x i16> [[V7]] ; %p1 = getelementptr inbounds i8, i8* %p0, i64 1 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 @@ -296,34 +314,97 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { } define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { -; CHECK-LABEL: @loadext_8i8_to_8i32( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 -; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 -; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 -; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 -; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 -; CHECK-NEXT: ret <8 x i32> [[V7]] +; SSE2-LABEL: @loadext_8i8_to_8i32( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 +; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 +; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 +; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 +; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 +; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 +; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 +; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 +; SSE2-NEXT: ret <8 x i32> [[V7]] +; +; SLM-LABEL: @loadext_8i8_to_8i32( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 +; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 +; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 +; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i32 +; SLM-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i32 +; SLM-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i32 +; SLM-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i32 +; SLM-NEXT: [[X4:%.*]] = sext i8 [[I4]] to i32 +; SLM-NEXT: [[X5:%.*]] = sext i8 [[I5]] to i32 +; SLM-NEXT: [[X6:%.*]] = sext i8 [[I6]] to i32 +; SLM-NEXT: [[X7:%.*]] = sext i8 [[I7]] to i32 +; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[X3]], i32 3 +; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4 +; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5 +; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6 +; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7 +; SLM-NEXT: ret <8 x i32> [[V7]] +; +; AVX-LABEL: @loadext_8i8_to_8i32( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i32> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 +; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 +; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 +; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 +; AVX-NEXT: ret <8 x i32> [[V7]] ; %p1 = getelementptr inbounds i8, i8* %p0, i64 1 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 @@ -360,58 +441,177 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { } define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { -; CHECK-LABEL: @loadext_16i8_to_16i16( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; CHECK-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; CHECK-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; CHECK-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; CHECK-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; CHECK-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; CHECK-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4 -; CHECK-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5 -; CHECK-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6 -; CHECK-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7 -; CHECK-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8 -; CHECK-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9 -; CHECK-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10 -; CHECK-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11 -; CHECK-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12 -; CHECK-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13 -; CHECK-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14 -; CHECK-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14 -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15 -; CHECK-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15 -; CHECK-NEXT: ret <16 x i16> [[V15]] +; SSE2-LABEL: @loadext_16i8_to_16i16( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SSE2-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 +; SSE2-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 +; SSE2-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 +; SSE2-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 +; SSE2-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 +; SSE2-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 +; SSE2-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 +; SSE2-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3 +; SSE2-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4 +; SSE2-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4 +; SSE2-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5 +; SSE2-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5 +; SSE2-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6 +; SSE2-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6 +; SSE2-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7 +; SSE2-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7 +; SSE2-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8 +; SSE2-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8 +; SSE2-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9 +; SSE2-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9 +; SSE2-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10 +; SSE2-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10 +; SSE2-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11 +; SSE2-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11 +; SSE2-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12 +; SSE2-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12 +; SSE2-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13 +; SSE2-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13 +; SSE2-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14 +; SSE2-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14 +; SSE2-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15 +; SSE2-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15 +; SSE2-NEXT: ret <16 x i16> [[V15]] +; +; SLM-LABEL: @loadext_16i8_to_16i16( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SLM-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 +; SLM-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 +; SLM-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 +; SLM-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 +; SLM-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 +; SLM-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 +; SLM-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 +; SLM-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 +; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 +; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 +; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 +; SLM-NEXT: [[I8:%.*]] = load i8, i8* [[P8]], align 1 +; SLM-NEXT: [[I9:%.*]] = load i8, i8* [[P9]], align 1 +; SLM-NEXT: [[I10:%.*]] = load i8, i8* [[P10]], align 1 +; SLM-NEXT: [[I11:%.*]] = load i8, i8* [[P11]], align 1 +; SLM-NEXT: [[I12:%.*]] = load i8, i8* [[P12]], align 1 +; SLM-NEXT: [[I13:%.*]] = load i8, i8* [[P13]], align 1 +; SLM-NEXT: [[I14:%.*]] = load i8, i8* [[P14]], align 1 +; SLM-NEXT: [[I15:%.*]] = load i8, i8* [[P15]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i16 +; SLM-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i16 +; SLM-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i16 +; SLM-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i16 +; SLM-NEXT: [[X4:%.*]] = sext i8 [[I4]] to i16 +; SLM-NEXT: [[X5:%.*]] = sext i8 [[I5]] to i16 +; SLM-NEXT: [[X6:%.*]] = sext i8 [[I6]] to i16 +; SLM-NEXT: [[X7:%.*]] = sext i8 [[I7]] to i16 +; SLM-NEXT: [[X8:%.*]] = sext i8 [[I8]] to i16 +; SLM-NEXT: [[X9:%.*]] = sext i8 [[I9]] to i16 +; SLM-NEXT: [[X10:%.*]] = sext i8 [[I10]] to i16 +; SLM-NEXT: [[X11:%.*]] = sext i8 [[I11]] to i16 +; SLM-NEXT: [[X12:%.*]] = sext i8 [[I12]] to i16 +; SLM-NEXT: [[X13:%.*]] = sext i8 [[I13]] to i16 +; SLM-NEXT: [[X14:%.*]] = sext i8 [[I14]] to i16 +; SLM-NEXT: [[X15:%.*]] = sext i8 [[I15]] to i16 +; SLM-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[X3]], i32 3 +; SLM-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[X4]], i32 4 +; SLM-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[X5]], i32 5 +; SLM-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[X6]], i32 6 +; SLM-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[X7]], i32 7 +; SLM-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[X8]], i32 8 +; SLM-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[X9]], i32 9 +; SLM-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[X10]], i32 10 +; SLM-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[X11]], i32 11 +; SLM-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[X12]], i32 12 +; SLM-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[X13]], i32 13 +; SLM-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[X14]], i32 14 +; SLM-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[X15]], i32 15 +; SLM-NEXT: ret <16 x i16> [[V15]] +; +; AVX-LABEL: @loadext_16i8_to_16i16( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 +; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 +; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 +; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 +; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 +; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 +; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 +; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i16> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4 +; AVX-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5 +; AVX-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6 +; AVX-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7 +; AVX-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7 +; AVX-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8 +; AVX-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8 +; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9 +; AVX-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9 +; AVX-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10 +; AVX-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10 +; AVX-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11 +; AVX-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11 +; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12 +; AVX-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12 +; AVX-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13 +; AVX-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13 +; AVX-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14 +; AVX-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14 +; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15 +; AVX-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15 +; AVX-NEXT: ret <16 x i16> [[V15]] ; %p1 = getelementptr inbounds i8, i8* %p0, i64 1 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 @@ -484,26 +684,15 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { ; define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { -; SSE2-LABEL: @loadext_2i16_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: ret <2 x i64> [[V1]] -; -; SLM-LABEL: @loadext_2i16_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* -; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: ret <2 x i64> [[V1]] +; SSE-LABEL: @loadext_2i16_to_2i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SSE-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i16_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 @@ -527,22 +716,57 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { } define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { -; CHECK-LABEL: @loadext_4i16_to_4i32( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 -; CHECK-NEXT: ret <4 x i32> [[V3]] +; SSE2-LABEL: @loadext_4i16_to_4i32( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 +; SSE2-NEXT: ret <4 x i32> [[V3]] +; +; SLM-LABEL: @loadext_4i16_to_4i32( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i32 +; SLM-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i32 +; SLM-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i32 +; SLM-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i32 +; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3 +; SLM-NEXT: ret <4 x i32> [[V3]] +; +; AVX-LABEL: @loadext_4i16_to_4i32( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 +; AVX-NEXT: ret <4 x i32> [[V3]] ; %p1 = getelementptr inbounds i16, i16* %p0, i64 1 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 @@ -563,40 +787,23 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { } define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { -; SSE2-LABEL: @loadext_4i16_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE2-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 -; SSE2-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 -; SSE2-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64 -; SSE2-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64 -; SSE2-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE2-NEXT: ret <4 x i64> [[V3]] -; -; SLM-LABEL: @loadext_4i16_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 -; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 -; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 -; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 -; SLM-NEXT: ret <4 x i64> [[V3]] +; SSE-LABEL: @loadext_4i16_to_4i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SSE-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SSE-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 +; SSE-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 +; SSE-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64 +; SSE-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64 +; SSE-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE-NEXT: ret <4 x i64> [[V3]] ; ; AVX1-LABEL: @loadext_4i16_to_4i64( ; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 @@ -670,34 +877,97 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { } define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { -; CHECK-LABEL: @loadext_8i16_to_8i32( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 -; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 -; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 -; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 -; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 -; CHECK-NEXT: ret <8 x i32> [[V7]] +; SSE2-LABEL: @loadext_8i16_to_8i32( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 +; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 +; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 +; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 +; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 +; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 +; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 +; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 +; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 +; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 +; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 +; SSE2-NEXT: ret <8 x i32> [[V7]] +; +; SLM-LABEL: @loadext_8i16_to_8i32( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 +; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 +; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 +; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 +; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 +; SLM-NEXT: [[I4:%.*]] = load i16, i16* [[P4]], align 1 +; SLM-NEXT: [[I5:%.*]] = load i16, i16* [[P5]], align 1 +; SLM-NEXT: [[I6:%.*]] = load i16, i16* [[P6]], align 1 +; SLM-NEXT: [[I7:%.*]] = load i16, i16* [[P7]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i32 +; SLM-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i32 +; SLM-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i32 +; SLM-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i32 +; SLM-NEXT: [[X4:%.*]] = sext i16 [[I4]] to i32 +; SLM-NEXT: [[X5:%.*]] = sext i16 [[I5]] to i32 +; SLM-NEXT: [[X6:%.*]] = sext i16 [[I6]] to i32 +; SLM-NEXT: [[X7:%.*]] = sext i16 [[I7]] to i32 +; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[X3]], i32 3 +; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4 +; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5 +; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6 +; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7 +; SLM-NEXT: ret <8 x i32> [[V7]] +; +; AVX-LABEL: @loadext_8i16_to_8i32( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 +; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 +; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 +; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i32> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 +; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 +; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 +; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 +; AVX-NEXT: ret <8 x i32> [[V7]] ; %p1 = getelementptr inbounds i16, i16* %p0, i64 1 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 @@ -738,26 +1008,15 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { ; define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { -; SSE2-LABEL: @loadext_2i32_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: ret <2 x i64> [[V1]] -; -; SLM-LABEL: @loadext_2i32_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* -; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: ret <2 x i64> [[V1]] +; SSE-LABEL: @loadext_2i32_to_2i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 +; SSE-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 +; SSE-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i32_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 @@ -781,40 +1040,23 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { } define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { -; SSE2-LABEL: @loadext_4i32_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SSE2-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 -; SSE2-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1 -; SSE2-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64 -; SSE2-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64 -; SSE2-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE2-NEXT: ret <4 x i64> [[V3]] -; -; SLM-LABEL: @loadext_4i32_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 -; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 -; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 -; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 -; SLM-NEXT: ret <4 x i64> [[V3]] +; SSE-LABEL: @loadext_4i32_to_4i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 +; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 +; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 +; SSE-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 +; SSE-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1 +; SSE-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1 +; SSE-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64 +; SSE-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64 +; SSE-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE-NEXT: ret <4 x i64> [[V3]] ; ; AVX1-LABEL: @loadext_4i32_to_4i64( ; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll index d82aeb856768f..ead4ffdeb0fcf 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll @@ -11,26 +11,15 @@ ; define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) { -; SSE2-LABEL: @loadext_2i8_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: ret <2 x i64> [[V1]] -; -; SLM-LABEL: @loadext_2i8_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* -; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: ret <2 x i64> [[V1]] +; SSE-LABEL: @loadext_2i8_to_2i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SSE-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 @@ -54,22 +43,57 @@ define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) { } define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { -; CHECK-LABEL: @loadext_4i8_to_4i32( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 -; CHECK-NEXT: ret <4 x i32> [[V3]] +; SSE2-LABEL: @loadext_4i8_to_4i32( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 +; SSE2-NEXT: ret <4 x i32> [[V3]] +; +; SLM-LABEL: @loadext_4i8_to_4i32( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i32 +; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i32 +; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i32 +; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i32 +; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3 +; SLM-NEXT: ret <4 x i32> [[V3]] +; +; AVX-LABEL: @loadext_4i8_to_4i32( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* +; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 +; AVX-NEXT: ret <4 x i32> [[V3]] ; %p1 = getelementptr inbounds i8, i8* %p0, i64 1 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 @@ -90,40 +114,23 @@ define <4 x i32> @loadext_4i8_to_4i32(i8* %p0) { } define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { -; SSE2-LABEL: @loadext_4i8_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 -; SSE2-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 -; SSE2-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 -; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 -; SSE2-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i64 -; SSE2-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE2-NEXT: ret <4 x i64> [[V3]] -; -; SLM-LABEL: @loadext_4i8_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <4 x i8>* -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, <4 x i8>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 -; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 -; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 -; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 -; SLM-NEXT: ret <4 x i64> [[V3]] +; SSE-LABEL: @loadext_4i8_to_4i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SSE-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SSE-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SSE-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 +; SSE-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i64 +; SSE-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE-NEXT: ret <4 x i64> [[V3]] ; ; AVX1-LABEL: @loadext_4i8_to_4i64( ; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 @@ -197,34 +204,97 @@ define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { } define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { -; CHECK-LABEL: @loadext_8i8_to_8i16( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4 -; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5 -; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6 -; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 -; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7 -; CHECK-NEXT: ret <8 x i16> [[V7]] +; SSE2-LABEL: @loadext_8i8_to_8i16( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3 +; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4 +; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4 +; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5 +; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5 +; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6 +; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6 +; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 +; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7 +; SSE2-NEXT: ret <8 x i16> [[V7]] +; +; SLM-LABEL: @loadext_8i8_to_8i16( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 +; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 +; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 +; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 +; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i16 +; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i16 +; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i16 +; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i16 +; SLM-NEXT: [[X4:%.*]] = zext i8 [[I4]] to i16 +; SLM-NEXT: [[X5:%.*]] = zext i8 [[I5]] to i16 +; SLM-NEXT: [[X6:%.*]] = zext i8 [[I6]] to i16 +; SLM-NEXT: [[X7:%.*]] = zext i8 [[I7]] to i16 +; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[X3]], i32 3 +; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[X4]], i32 4 +; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[X5]], i32 5 +; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[X6]], i32 6 +; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[X7]], i32 7 +; SLM-NEXT: ret <8 x i16> [[V7]] +; +; AVX-LABEL: @loadext_8i8_to_8i16( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i16> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i16> [[V0]], i16 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i16> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i16> [[V1]], i16 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i16> [[V2]], i16 [[TMP7]], i32 3 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i16> [[TMP3]], i32 4 +; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i16> [[V3]], i16 [[TMP8]], i32 4 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i16> [[TMP3]], i32 5 +; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i16> [[V4]], i16 [[TMP9]], i32 5 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP3]], i32 6 +; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i16> [[V5]], i16 [[TMP10]], i32 6 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7 +; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i16> [[V6]], i16 [[TMP11]], i32 7 +; AVX-NEXT: ret <8 x i16> [[V7]] ; %p1 = getelementptr inbounds i8, i8* %p0, i64 1 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 @@ -261,34 +331,97 @@ define <8 x i16> @loadext_8i8_to_8i16(i8* %p0) { } define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { -; CHECK-LABEL: @loadext_8i8_to_8i32( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 -; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 -; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 -; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 -; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 -; CHECK-NEXT: ret <8 x i32> [[V7]] +; SSE2-LABEL: @loadext_8i8_to_8i32( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 +; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 +; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 +; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 +; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 +; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 +; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 +; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 +; SSE2-NEXT: ret <8 x i32> [[V7]] +; +; SLM-LABEL: @loadext_8i8_to_8i32( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 +; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 +; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 +; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 +; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i32 +; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i32 +; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i32 +; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i32 +; SLM-NEXT: [[X4:%.*]] = zext i8 [[I4]] to i32 +; SLM-NEXT: [[X5:%.*]] = zext i8 [[I5]] to i32 +; SLM-NEXT: [[X6:%.*]] = zext i8 [[I6]] to i32 +; SLM-NEXT: [[X7:%.*]] = zext i8 [[I7]] to i32 +; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[X3]], i32 3 +; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4 +; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5 +; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6 +; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7 +; SLM-NEXT: ret <8 x i32> [[V7]] +; +; AVX-LABEL: @loadext_8i8_to_8i32( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <8 x i8>* +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 +; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 +; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 +; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 +; AVX-NEXT: ret <8 x i32> [[V7]] ; %p1 = getelementptr inbounds i8, i8* %p0, i64 1 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 @@ -325,58 +458,177 @@ define <8 x i32> @loadext_8i8_to_8i32(i8* %p0) { } define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { -; CHECK-LABEL: @loadext_16i8_to_16i16( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 -; CHECK-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 -; CHECK-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 -; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 -; CHECK-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 -; CHECK-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 -; CHECK-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 -; CHECK-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 -; CHECK-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4 -; CHECK-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5 -; CHECK-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6 -; CHECK-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7 -; CHECK-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8 -; CHECK-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9 -; CHECK-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10 -; CHECK-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10 -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11 -; CHECK-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12 -; CHECK-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12 -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13 -; CHECK-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13 -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14 -; CHECK-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14 -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15 -; CHECK-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15 -; CHECK-NEXT: ret <16 x i16> [[V15]] +; SSE2-LABEL: @loadext_16i8_to_16i16( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SSE2-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 +; SSE2-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 +; SSE2-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 +; SSE2-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 +; SSE2-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 +; SSE2-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 +; SSE2-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 +; SSE2-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3 +; SSE2-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4 +; SSE2-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4 +; SSE2-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5 +; SSE2-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5 +; SSE2-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6 +; SSE2-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6 +; SSE2-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7 +; SSE2-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7 +; SSE2-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8 +; SSE2-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8 +; SSE2-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9 +; SSE2-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9 +; SSE2-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10 +; SSE2-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10 +; SSE2-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11 +; SSE2-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11 +; SSE2-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12 +; SSE2-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12 +; SSE2-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13 +; SSE2-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13 +; SSE2-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14 +; SSE2-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14 +; SSE2-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15 +; SSE2-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15 +; SSE2-NEXT: ret <16 x i16> [[V15]] +; +; SLM-LABEL: @loadext_16i8_to_16i16( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; SLM-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 +; SLM-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 +; SLM-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 +; SLM-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 +; SLM-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 +; SLM-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 +; SLM-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 +; SLM-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SLM-NEXT: [[I4:%.*]] = load i8, i8* [[P4]], align 1 +; SLM-NEXT: [[I5:%.*]] = load i8, i8* [[P5]], align 1 +; SLM-NEXT: [[I6:%.*]] = load i8, i8* [[P6]], align 1 +; SLM-NEXT: [[I7:%.*]] = load i8, i8* [[P7]], align 1 +; SLM-NEXT: [[I8:%.*]] = load i8, i8* [[P8]], align 1 +; SLM-NEXT: [[I9:%.*]] = load i8, i8* [[P9]], align 1 +; SLM-NEXT: [[I10:%.*]] = load i8, i8* [[P10]], align 1 +; SLM-NEXT: [[I11:%.*]] = load i8, i8* [[P11]], align 1 +; SLM-NEXT: [[I12:%.*]] = load i8, i8* [[P12]], align 1 +; SLM-NEXT: [[I13:%.*]] = load i8, i8* [[P13]], align 1 +; SLM-NEXT: [[I14:%.*]] = load i8, i8* [[P14]], align 1 +; SLM-NEXT: [[I15:%.*]] = load i8, i8* [[P15]], align 1 +; SLM-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i16 +; SLM-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i16 +; SLM-NEXT: [[X2:%.*]] = zext i8 [[I2]] to i16 +; SLM-NEXT: [[X3:%.*]] = zext i8 [[I3]] to i16 +; SLM-NEXT: [[X4:%.*]] = zext i8 [[I4]] to i16 +; SLM-NEXT: [[X5:%.*]] = zext i8 [[I5]] to i16 +; SLM-NEXT: [[X6:%.*]] = zext i8 [[I6]] to i16 +; SLM-NEXT: [[X7:%.*]] = zext i8 [[I7]] to i16 +; SLM-NEXT: [[X8:%.*]] = zext i8 [[I8]] to i16 +; SLM-NEXT: [[X9:%.*]] = zext i8 [[I9]] to i16 +; SLM-NEXT: [[X10:%.*]] = zext i8 [[I10]] to i16 +; SLM-NEXT: [[X11:%.*]] = zext i8 [[I11]] to i16 +; SLM-NEXT: [[X12:%.*]] = zext i8 [[I12]] to i16 +; SLM-NEXT: [[X13:%.*]] = zext i8 [[I13]] to i16 +; SLM-NEXT: [[X14:%.*]] = zext i8 [[I14]] to i16 +; SLM-NEXT: [[X15:%.*]] = zext i8 [[I15]] to i16 +; SLM-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[X3]], i32 3 +; SLM-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[X4]], i32 4 +; SLM-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[X5]], i32 5 +; SLM-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[X6]], i32 6 +; SLM-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[X7]], i32 7 +; SLM-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[X8]], i32 8 +; SLM-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[X9]], i32 9 +; SLM-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[X10]], i32 10 +; SLM-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[X11]], i32 11 +; SLM-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[X12]], i32 12 +; SLM-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[X13]], i32 13 +; SLM-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[X14]], i32 14 +; SLM-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[X15]], i32 15 +; SLM-NEXT: ret <16 x i16> [[V15]] +; +; AVX-LABEL: @loadext_16i8_to_16i16( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 4 +; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 5 +; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 6 +; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 7 +; AVX-NEXT: [[P8:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 8 +; AVX-NEXT: [[P9:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 9 +; AVX-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 10 +; AVX-NEXT: [[P11:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 11 +; AVX-NEXT: [[P12:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 12 +; AVX-NEXT: [[P13:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13 +; AVX-NEXT: [[P14:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14 +; AVX-NEXT: [[P15:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>* +; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x i16> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <16 x i16> undef, i16 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <16 x i16> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <16 x i16> [[V0]], i16 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <16 x i16> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <16 x i16> [[V1]], i16 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x i16> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <16 x i16> [[V2]], i16 [[TMP7]], i32 3 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <16 x i16> [[TMP3]], i32 4 +; AVX-NEXT: [[V4:%.*]] = insertelement <16 x i16> [[V3]], i16 [[TMP8]], i32 4 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <16 x i16> [[TMP3]], i32 5 +; AVX-NEXT: [[V5:%.*]] = insertelement <16 x i16> [[V4]], i16 [[TMP9]], i32 5 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP3]], i32 6 +; AVX-NEXT: [[V6:%.*]] = insertelement <16 x i16> [[V5]], i16 [[TMP10]], i32 6 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP3]], i32 7 +; AVX-NEXT: [[V7:%.*]] = insertelement <16 x i16> [[V6]], i16 [[TMP11]], i32 7 +; AVX-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP3]], i32 8 +; AVX-NEXT: [[V8:%.*]] = insertelement <16 x i16> [[V7]], i16 [[TMP12]], i32 8 +; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x i16> [[TMP3]], i32 9 +; AVX-NEXT: [[V9:%.*]] = insertelement <16 x i16> [[V8]], i16 [[TMP13]], i32 9 +; AVX-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP3]], i32 10 +; AVX-NEXT: [[V10:%.*]] = insertelement <16 x i16> [[V9]], i16 [[TMP14]], i32 10 +; AVX-NEXT: [[TMP15:%.*]] = extractelement <16 x i16> [[TMP3]], i32 11 +; AVX-NEXT: [[V11:%.*]] = insertelement <16 x i16> [[V10]], i16 [[TMP15]], i32 11 +; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP3]], i32 12 +; AVX-NEXT: [[V12:%.*]] = insertelement <16 x i16> [[V11]], i16 [[TMP16]], i32 12 +; AVX-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP3]], i32 13 +; AVX-NEXT: [[V13:%.*]] = insertelement <16 x i16> [[V12]], i16 [[TMP17]], i32 13 +; AVX-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP3]], i32 14 +; AVX-NEXT: [[V14:%.*]] = insertelement <16 x i16> [[V13]], i16 [[TMP18]], i32 14 +; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x i16> [[TMP3]], i32 15 +; AVX-NEXT: [[V15:%.*]] = insertelement <16 x i16> [[V14]], i16 [[TMP19]], i32 15 +; AVX-NEXT: ret <16 x i16> [[V15]] ; %p1 = getelementptr inbounds i8, i8* %p0, i64 1 %p2 = getelementptr inbounds i8, i8* %p0, i64 2 @@ -449,26 +701,15 @@ define <16 x i16> @loadext_16i8_to_16i16(i8* %p0) { ; define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { -; SSE2-LABEL: @loadext_2i16_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: ret <2 x i64> [[V1]] -; -; SLM-LABEL: @loadext_2i16_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* -; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: ret <2 x i64> [[V1]] +; SSE-LABEL: @loadext_2i16_to_2i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SSE-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i16_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 @@ -492,22 +733,57 @@ define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { } define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { -; CHECK-LABEL: @loadext_4i16_to_4i32( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 -; CHECK-NEXT: ret <4 x i32> [[V3]] +; SSE2-LABEL: @loadext_4i16_to_4i32( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 +; SSE2-NEXT: ret <4 x i32> [[V3]] +; +; SLM-LABEL: @loadext_4i16_to_4i32( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 +; SLM-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i32 +; SLM-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i32 +; SLM-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i32 +; SLM-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i32 +; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[X3]], i32 3 +; SLM-NEXT: ret <4 x i32> [[V3]] +; +; AVX-LABEL: @loadext_4i16_to_4i32( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* +; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <4 x i32> [[V0]], i32 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <4 x i32> [[V2]], i32 [[TMP7]], i32 3 +; AVX-NEXT: ret <4 x i32> [[V3]] ; %p1 = getelementptr inbounds i16, i16* %p0, i64 1 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 @@ -528,40 +804,23 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) { } define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { -; SSE2-LABEL: @loadext_4i16_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE2-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 -; SSE2-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 -; SSE2-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 -; SSE2-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i64 -; SSE2-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i64 -; SSE2-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE2-NEXT: ret <4 x i64> [[V3]] -; -; SLM-LABEL: @loadext_4i16_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>* -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 -; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 -; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 -; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 -; SLM-NEXT: ret <4 x i64> [[V3]] +; SSE-LABEL: @loadext_4i16_to_4i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SSE-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SSE-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 +; SSE-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 +; SSE-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i64 +; SSE-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i64 +; SSE-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE-NEXT: ret <4 x i64> [[V3]] ; ; AVX1-LABEL: @loadext_4i16_to_4i64( ; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 @@ -635,34 +894,97 @@ define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { } define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { -; CHECK-LABEL: @loadext_8i16_to_8i32( -; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 -; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 -; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 -; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 -; CHECK-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 -; CHECK-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 -; CHECK-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 -; CHECK-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 -; CHECK-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 -; CHECK-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 -; CHECK-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 -; CHECK-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 -; CHECK-NEXT: ret <8 x i32> [[V7]] +; SSE2-LABEL: @loadext_8i16_to_8i32( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SSE2-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 +; SSE2-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 +; SSE2-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 +; SSE2-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 +; SSE2-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 +; SSE2-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 +; SSE2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 +; SSE2-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 +; SSE2-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 +; SSE2-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 +; SSE2-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 +; SSE2-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 +; SSE2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 +; SSE2-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 +; SSE2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; SSE2-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 +; SSE2-NEXT: ret <8 x i32> [[V7]] +; +; SLM-LABEL: @loadext_8i16_to_8i32( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SLM-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 +; SLM-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 +; SLM-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 +; SLM-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 +; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 +; SLM-NEXT: [[I4:%.*]] = load i16, i16* [[P4]], align 1 +; SLM-NEXT: [[I5:%.*]] = load i16, i16* [[P5]], align 1 +; SLM-NEXT: [[I6:%.*]] = load i16, i16* [[P6]], align 1 +; SLM-NEXT: [[I7:%.*]] = load i16, i16* [[P7]], align 1 +; SLM-NEXT: [[X0:%.*]] = zext i16 [[I0]] to i32 +; SLM-NEXT: [[X1:%.*]] = zext i16 [[I1]] to i32 +; SLM-NEXT: [[X2:%.*]] = zext i16 [[I2]] to i32 +; SLM-NEXT: [[X3:%.*]] = zext i16 [[I3]] to i32 +; SLM-NEXT: [[X4:%.*]] = zext i16 [[I4]] to i32 +; SLM-NEXT: [[X5:%.*]] = zext i16 [[I5]] to i32 +; SLM-NEXT: [[X6:%.*]] = zext i16 [[I6]] to i32 +; SLM-NEXT: [[X7:%.*]] = zext i16 [[I7]] to i32 +; SLM-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[X3]], i32 3 +; SLM-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[X4]], i32 4 +; SLM-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[X5]], i32 5 +; SLM-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[X6]], i32 6 +; SLM-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[X7]], i32 7 +; SLM-NEXT: ret <8 x i32> [[V7]] +; +; AVX-LABEL: @loadext_8i16_to_8i32( +; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; AVX-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; AVX-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; AVX-NEXT: [[P4:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 4 +; AVX-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5 +; AVX-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6 +; AVX-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7 +; AVX-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 1 +; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> +; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP3]], i32 0 +; AVX-NEXT: [[V0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP4]], i32 0 +; AVX-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP3]], i32 1 +; AVX-NEXT: [[V1:%.*]] = insertelement <8 x i32> [[V0]], i32 [[TMP5]], i32 1 +; AVX-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i32 2 +; AVX-NEXT: [[V2:%.*]] = insertelement <8 x i32> [[V1]], i32 [[TMP6]], i32 2 +; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP3]], i32 3 +; AVX-NEXT: [[V3:%.*]] = insertelement <8 x i32> [[V2]], i32 [[TMP7]], i32 3 +; AVX-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i32 4 +; AVX-NEXT: [[V4:%.*]] = insertelement <8 x i32> [[V3]], i32 [[TMP8]], i32 4 +; AVX-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i32 5 +; AVX-NEXT: [[V5:%.*]] = insertelement <8 x i32> [[V4]], i32 [[TMP9]], i32 5 +; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i32 6 +; AVX-NEXT: [[V6:%.*]] = insertelement <8 x i32> [[V5]], i32 [[TMP10]], i32 6 +; AVX-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP3]], i32 7 +; AVX-NEXT: [[V7:%.*]] = insertelement <8 x i32> [[V6]], i32 [[TMP11]], i32 7 +; AVX-NEXT: ret <8 x i32> [[V7]] ; %p1 = getelementptr inbounds i16, i16* %p0, i64 1 %p2 = getelementptr inbounds i16, i16* %p0, i64 2 @@ -703,26 +1025,15 @@ define <8 x i32> @loadext_8i16_to_8i32(i16* %p0) { ; define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { -; SSE2-LABEL: @loadext_2i32_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = zext i32 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = zext i32 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: ret <2 x i64> [[V1]] -; -; SLM-LABEL: @loadext_2i32_to_2i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* -; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: ret <2 x i64> [[V1]] +; SSE-LABEL: @loadext_2i32_to_2i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 +; SSE-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 +; SSE-NEXT: [[X0:%.*]] = zext i32 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = zext i32 [[I1]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i32_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 @@ -746,40 +1057,23 @@ define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { } define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { -; SSE2-LABEL: @loadext_4i32_to_4i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SSE2-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 -; SSE2-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1 -; SSE2-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1 -; SSE2-NEXT: [[X0:%.*]] = zext i32 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = zext i32 [[I1]] to i64 -; SSE2-NEXT: [[X2:%.*]] = zext i32 [[I2]] to i64 -; SSE2-NEXT: [[X3:%.*]] = zext i32 [[I3]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE2-NEXT: ret <4 x i64> [[V3]] -; -; SLM-LABEL: @loadext_4i32_to_4i64( -; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SLM-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <4 x i32>* -; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> -; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 -; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 -; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 -; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 -; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[TMP6]], i32 2 -; SLM-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 -; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[TMP7]], i32 3 -; SLM-NEXT: ret <4 x i64> [[V3]] +; SSE-LABEL: @loadext_4i32_to_4i64( +; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 +; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 +; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 +; SSE-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 +; SSE-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 +; SSE-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1 +; SSE-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1 +; SSE-NEXT: [[X0:%.*]] = zext i32 [[I0]] to i64 +; SSE-NEXT: [[X1:%.*]] = zext i32 [[I1]] to i64 +; SSE-NEXT: [[X2:%.*]] = zext i32 [[I2]] to i64 +; SSE-NEXT: [[X3:%.*]] = zext i32 [[I3]] to i64 +; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE-NEXT: ret <4 x i64> [[V3]] ; ; AVX1-LABEL: @loadext_4i32_to_4i64( ; AVX1-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 From 5d21f75b57658db538b5e1764edc775271a651cd Mon Sep 17 00:00:00 2001 From: taewookoh Date: Wed, 27 Nov 2019 11:17:10 -0800 Subject: [PATCH 132/591] Revert b19ec1eb3d0c Summary: This reverts commit b19ec1eb3d0c as it fails powerpc tests Subscribers: llvm-commits --- .../llvm/Analysis/BranchProbabilityInfo.h | 7 +- llvm/lib/Analysis/BranchProbabilityInfo.cpp | 132 +++++++-------- .../Analysis/BranchProbabilityInfo/basic.ll | 18 -- .../BranchProbabilityInfo/noreturn.ll | 26 --- llvm/test/CodeGen/X86/block-placement.ll | 4 +- llvm/test/CodeGen/X86/pr37916.ll | 1 + llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 160 +++++++++--------- 7 files changed, 143 insertions(+), 205 deletions(-) diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 41d6c23b8d0d9..c8965936fb9c1 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -34,7 +34,6 @@ namespace llvm { class Function; class LoopInfo; class raw_ostream; -class PostDominatorTree; class TargetLibraryInfo; class Value; @@ -188,10 +187,8 @@ class BranchProbabilityInfo { /// Track the set of blocks that always lead to a cold call. SmallPtrSet PostDominatedByColdCall; - void computePostDominatedByUnreachable(const Function &F, - PostDominatorTree *PDT); - void computePostDominatedByColdCall(const Function &F, - PostDominatorTree *PDT); + void updatePostDominatedByUnreachable(const BasicBlock *BB); + void updatePostDominatedByColdCall(const BasicBlock *BB); bool calcUnreachableHeuristics(const BasicBlock *BB); bool calcMetadataWeights(const BasicBlock *BB); bool calcColdCallHeuristics(const BasicBlock *BB); diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp index ffba65b5ed5ee..7bd237b9ad537 100644 --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -147,83 +146,69 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; /// instruction. This is essentially never taken. static const uint32_t IH_NONTAKEN_WEIGHT = 1; -static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, - SmallVectorImpl &WorkList, - SmallPtrSetImpl &TargetSet) { - SmallVector Descendants; - SmallPtrSet NewItems; - - PDT->getDescendants(const_cast(BB), Descendants); - for (auto *BB : Descendants) - if (TargetSet.insert(BB).second) - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (!TargetSet.count(*PI)) - NewItems.insert(*PI); - WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); -} - -/// Compute a set of basic blocks that are post-dominated by unreachables. -void BranchProbabilityInfo::computePostDominatedByUnreachable( - const Function &F, PostDominatorTree *PDT) { - SmallVector WorkList; - for (auto &BB : F) { - const Instruction *TI = BB.getTerminator(); - if (TI->getNumSuccessors() == 0) { - if (isa(TI) || - // If this block is terminated by a call to - // @llvm.experimental.deoptimize then treat it like an unreachable - // since the @llvm.experimental.deoptimize call is expected to - // practically never execute. - BB.getTerminatingDeoptimizeCall()) - UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); - } +/// Add \p BB to PostDominatedByUnreachable set if applicable. +void +BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { + const Instruction *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa(TI) || + // If this block is terminated by a call to + // @llvm.experimental.deoptimize then treat it like an unreachable since + // the @llvm.experimental.deoptimize call is expected to practically + // never execute. + BB->getTerminatingDeoptimizeCall()) + PostDominatedByUnreachable.insert(BB); + return; } - while (!WorkList.empty()) { - const BasicBlock *BB = WorkList.pop_back_val(); - if (PostDominatedByUnreachable.count(BB)) - continue; - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(BB->getTerminator())) { - if (PostDominatedByUnreachable.count(II->getNormalDest())) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); - } - // If all the successors are unreachable, BB is unreachable as well. - else if (!successors(BB).empty() && - llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { - return PostDominatedByUnreachable.count(Succ); - })) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); + // If the terminator is an InvokeInst, check only the normal destination block + // as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast(TI)) { + if (PostDominatedByUnreachable.count(II->getNormalDest())) + PostDominatedByUnreachable.insert(BB); + return; } + + for (auto *I : successors(BB)) + // If any of successor is not post dominated then BB is also not. + if (!PostDominatedByUnreachable.count(I)) + return; + + PostDominatedByUnreachable.insert(BB); } -/// compute a set of basic blocks that are post-dominated by ColdCalls. -void BranchProbabilityInfo::computePostDominatedByColdCall( - const Function &F, PostDominatorTree *PDT) { - SmallVector WorkList; - for (auto &BB : F) - for (auto &I : BB) - if (const CallInst *CI = dyn_cast(&I)) - if (CI->hasFnAttr(Attribute::Cold)) - UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); +/// Add \p BB to PostDominatedByColdCall set if applicable. +void +BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { + assert(!PostDominatedByColdCall.count(BB)); + const Instruction *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) + return; - while (!WorkList.empty()) { - const BasicBlock *BB = WorkList.pop_back_val(); + // If all of successor are post dominated then BB is also done. + if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) { + return PostDominatedByColdCall.count(SuccBB); + })) { + PostDominatedByColdCall.insert(BB); + return; + } - // If the terminator is an InvokeInst, check only the normal destination - // block as the unwind edge of InvokeInst is also very unlikely taken. - if (auto *II = dyn_cast(BB->getTerminator())) { - if (PostDominatedByColdCall.count(II->getNormalDest())) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); + // If the terminator is an InvokeInst, check only the normal destination + // block as the unwind edge of InvokeInst is also very unlikely taken. + if (auto *II = dyn_cast(TI)) + if (PostDominatedByColdCall.count(II->getNormalDest())) { + PostDominatedByColdCall.insert(BB); + return; } - // If all of successor are post dominated then BB is also done. - else if (!successors(BB).empty() && - llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { - return PostDominatedByColdCall.count(Succ); - })) - UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); - } + + // Otherwise, if the block itself contains a cold function, add it to the + // set of blocks post-dominated by a cold call. + for (auto &I : *BB) + if (const CallInst *CI = dyn_cast(&I)) + if (CI->hasFnAttr(Attribute::Cold)) { + PostDominatedByColdCall.insert(BB); + return; + } } /// Calculate edge weights for successors lead to unreachable. @@ -998,16 +983,13 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, LLVM_DEBUG(dbgs() << "\n"); } - std::unique_ptr PDT = - std::make_unique(const_cast(F)); - computePostDominatedByUnreachable(F, PDT.get()); - computePostDominatedByColdCall(F, PDT.get()); - // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. for (auto BB : post_order(&F.getEntryBlock())) { LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); + updatePostDominatedByUnreachable(BB); + updatePostDominatedByColdCall(BB); // If there is no at least two successors, no sense to set probability. if (BB->getTerminator()->getNumSuccessors() < 2) continue; diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll index 8212cc4769045..64e0a82456f11 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -141,24 +141,6 @@ exit: ret i32 %result } -define i32 @test_cold_loop(i32 %a, i32 %b) { -entry: - %cond1 = icmp eq i32 %a, 42 - br i1 %cond1, label %header, label %exit - -header: - br label %body - -body: - %cond2 = icmp eq i32 %b, 42 - br i1 %cond2, label %header, label %exit -; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% - -exit: - call void @coldfunc() - ret i32 %b -} - declare i32 @regular_function(i32 %i) define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) { diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll index 6e01afd2cfc82..0566ca16c2f3a 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -79,32 +79,6 @@ exit: ret i32 %b } -define i32 @test4(i32 %a, i32 %b) { -; CHECK: Printing analysis {{.*}} for function 'test4' -; Make sure we handle loops post-dominated by unreachables. -entry: - %cond1 = icmp eq i32 %a, 42 - br i1 %cond1, label %header, label %exit -; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% -; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] - -header: - br label %body - -body: - %cond2 = icmp eq i32 %a, 42 - br i1 %cond2, label %header, label %abort -; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% -; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% - -abort: - call void @abort() noreturn - unreachable - -exit: - ret i32 %b -} - @_ZTIi = external global i8* ; CHECK-LABEL: throwSmallException diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index 258cc2031ae8b..acc4b7e138118 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) { ; CHECK: %loop.header ; CHECK: %loop.body1 ; CHECK: %loop.body2 -; CHECK: %loop.body3 -; CHECK: %loop.inner1.begin ; CHECK: %loop.body4 ; CHECK: %loop.inner2.begin ; CHECK: %loop.inner2.begin +; CHECK: %loop.body3 +; CHECK: %loop.inner1.begin ; CHECK: %bail entry: diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll index 484104da9ff47..2da9413a9a0cf 100644 --- a/llvm/test/CodeGen/X86/pr37916.ll +++ b/llvm/test/CodeGen/X86/pr37916.ll @@ -7,6 +7,7 @@ define void @fn1() local_unnamed_addr { ; CHECK-LABEL: fn1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %if.end ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl a+4, %eax diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index 92708d33924f0..9238ab0bf89f7 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -29,8 +29,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 56 -; CHECK-NEXT: subq $552, %rsp ## imm = 0x228 -; CHECK-NEXT: .cfi_def_cfa_offset 608 +; CHECK-NEXT: subq $536, %rsp ## imm = 0x218 +; CHECK-NEXT: .cfi_def_cfa_offset 592 ; CHECK-NEXT: .cfi_offset %rbx, -56 ; CHECK-NEXT: .cfi_offset %r12, -48 ; CHECK-NEXT: .cfi_offset %r13, -40 @@ -54,7 +54,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: LBB0_4: ## %cleanup -; CHECK-NEXT: addq $552, %rsp ## imm = 0x228 +; CHECK-NEXT: addq $536, %rsp ## imm = 0x218 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 ; CHECK-NEXT: popq %r13 @@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: je LBB0_55 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rdi, %r14 +; CHECK-NEXT: movq %rdi, %rbp ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx @@ -78,10 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: movl $32, %esi ; CHECK-NEXT: callq _memset ; CHECK-NEXT: LBB0_8: ## %while.body.preheader +; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx -; CHECK-NEXT: leaq 8(%rcx,%rax), %rax -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx ; CHECK-NEXT: movl $1, %r15d ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax ; CHECK-NEXT: movb $1, %cl @@ -92,70 +92,69 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: testb %bpl, %bpl +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: testb %r14b, %r14b ; CHECK-NEXT: jne LBB0_11 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: leaq {{.*}}(%rip), %r13 -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: xorl %r12d, %r12d -; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: leaq {{.*}}(%rip), %rsi +; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: xorl %r13d, %r13d ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_20: ## %sw.bb256 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: decl %r15d ; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r12d, %ebp +; CHECK-NEXT: movl %r13d, %r14d ; CHECK-NEXT: jle LBB0_22 ; CHECK-NEXT: LBB0_13: ## %while.body200 ; CHECK-NEXT: ## =>This Loop Header: Depth=1 ; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%rbp), %eax +; CHECK-NEXT: leal -268(%r14), %eax ; CHECK-NEXT: cmpl $105, %eax ; CHECK-NEXT: ja LBB0_14 ; CHECK-NEXT: ## %bb.56: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%r13,%rax,4), %rax -; CHECK-NEXT: addq %r13, %rax +; CHECK-NEXT: movslq (%rdi,%rax,4), %rax +; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movl %ebp, %r12d +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movl %r14d, %r13d ; CHECK-NEXT: jne LBB0_21 ; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal 1(%rbp), %eax +; CHECK-NEXT: leal 1(%r14), %eax ; CHECK-NEXT: cmpl $21, %eax ; CHECK-NEXT: ja LBB0_20 ; CHECK-NEXT: ## %bb.15: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $-1, %r12d -; CHECK-NEXT: leaq {{.*}}(%rip), %rcx -; CHECK-NEXT: movslq (%rcx,%rax,4), %rax -; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: movl $-1, %r13d +; CHECK-NEXT: movslq (%rsi,%rax,4), %rax +; CHECK-NEXT: addq %rsi, %rax ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $1, %r12d +; CHECK-NEXT: movl $1, %r13d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: ## implicit-def: $r14 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: ## implicit-def: $r12 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: ## implicit-def: $r14 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: ## implicit-def: $r12 ; CHECK-NEXT: jne LBB0_34 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -166,8 +165,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 -; CHECK-NEXT: leaq 1(%r14), %rax -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: leaq 1(%r12), %rax +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: je LBB0_33 ; CHECK-NEXT: ## %bb.29: ## %land.rhs485 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 @@ -176,14 +175,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movq %rax, %r14 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: movq %rax, %r12 +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: jmp LBB0_34 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 @@ -193,23 +193,23 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jb LBB0_55 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_40: ## %sw.bb566 +; CHECK-NEXT: LBB0_19: ## %sw.bb243 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r12d +; CHECK-NEXT: movl $2, %r13d ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_19: ## %sw.bb243 +; CHECK-NEXT: LBB0_40: ## %sw.bb566 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r12d +; CHECK-NEXT: movl $20, %r13d ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: incq %r14 +; CHECK-NEXT: incq %r12 ; CHECK-NEXT: LBB0_34: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leal -324(%r12), %eax +; CHECK-NEXT: leal -324(%r13), %eax ; CHECK-NEXT: cmpl $59, %eax ; CHECK-NEXT: ja LBB0_35 ; CHECK-NEXT: ## %bb.57: ## %if.end517 @@ -219,11 +219,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: jb LBB0_38 ; CHECK-NEXT: LBB0_35: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $11, %r12d +; CHECK-NEXT: cmpl $11, %r13d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.36: ## %if.end517 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmpl $24, %r12d +; CHECK-NEXT: cmpl $24, %r13d ; CHECK-NEXT: je LBB0_38 ; CHECK-NEXT: ## %bb.37: ## %if.then532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 @@ -233,14 +233,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: LBB0_38: ## %for.cond534 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 -; CHECK-NEXT: testb %bl, %bl +; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne LBB0_38 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movb $0, (%r14) -; CHECK-NEXT: movl %ebp, %r12d -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movb $0, (%r12) +; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: leaq {{.*}}(%rip), %rsi +; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: jmp LBB0_21 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 @@ -255,44 +256,30 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp LBB0_25 ; CHECK-NEXT: LBB0_11: -; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill -; CHECK-NEXT: xorl %r12d, %r12d +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: xorl %r13d, %r13d ; CHECK-NEXT: LBB0_22: ## %while.end1465 -; CHECK-NEXT: incl %r12d -; CHECK-NEXT: cmpl $16, %r12d +; CHECK-NEXT: incl %r13d +; CHECK-NEXT: cmpl $16, %r13d ; CHECK-NEXT: ja LBB0_50 ; CHECK-NEXT: ## %bb.23: ## %while.end1465 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 -; CHECK-NEXT: btl %r12d, %eax +; CHECK-NEXT: btl %r13d, %eax ; CHECK-NEXT: jae LBB0_50 ; CHECK-NEXT: ## %bb.24: -; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload ; CHECK-NEXT: LBB0_48: ## %if.then1477 ; CHECK-NEXT: movl $1, %edx ; CHECK-NEXT: callq _write -; CHECK-NEXT: subq %rbx, %r14 +; CHECK-NEXT: subq %rbp, %rbx ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax -; CHECK-NEXT: leaq 8189(%r14,%rax), %rax +; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_49: ## %for.body1723 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: decq %rax ; CHECK-NEXT: jmp LBB0_49 -; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit -; CHECK-NEXT: movq %r14, %rbx -; CHECK-NEXT: jmp LBB0_48 -; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je LBB0_41 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_17: ## %for.body643.us -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_17 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_41: ## %while.cond661 -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp LBB0_41 ; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 ; CHECK-NEXT: cmpq %rax, %rax @@ -302,15 +289,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne LBB0_54 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader -; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill +; CHECK-NEXT: incl %ebp +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_53: ## %while.body1679 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload -; CHECK-NEXT: movq (%rax), %rdi +; CHECK-NEXT: movq (%rbx), %rdi ; CHECK-NEXT: callq _fileno -; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload -; CHECK-NEXT: leal 1(%rax), %ecx -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movslq %ebp, %rax +; CHECK-NEXT: leal 1(%rax), %ebp ; CHECK-NEXT: cmpq %rax, %rax ; CHECK-NEXT: jl LBB0_53 ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader @@ -318,6 +304,22 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: LBB0_55: ## %if.then.i ; CHECK-NEXT: ud2 +; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload +; CHECK-NEXT: movq %rbx, %rbp +; CHECK-NEXT: jmp LBB0_48 +; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je LBB0_41 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_17: ## %for.body643.us +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_17 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_41: ## %while.cond661 +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp LBB0_41 entry: %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64 %old = alloca [512 x i8], align 16 From 9283681e168141bab9a883e48ce1da80b86afca3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 27 Nov 2019 11:11:41 -0800 Subject: [PATCH 133/591] [CriticalAntiDepBreaker] Teach the regmask clobber check to check if any subregister is preserved before considering the super register clobbered X86 has some calling conventions where bits 127:0 of a vector register are callee saved, but the upper bits aren't. Previously we could detect that the full ymm register was clobbered when the xmm portion was really preserved. This patch checks the subregisters to make sure they aren't preserved. Fixes PR44140 Differential Revision: https://reviews.llvm.org/D70699 --- llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp | 16 +++++++++++++--- llvm/test/CodeGen/X86/pr44140.ll | 7 +++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 702e7e244bcec..8d9d48402b311 100644 --- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -261,15 +261,25 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isRegMask()) - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (MO.clobbersPhysReg(i)) { + if (MO.isRegMask()) { + auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) { + for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI) + if (!MO.clobbersPhysReg(*SRI)) + return false; + + return true; + }; + + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + if (ClobbersPhysRegAndSubRegs(i)) { DefIndices[i] = Count; KillIndices[i] = ~0u; KeepRegs.reset(i); Classes[i] = nullptr; RegRefs.erase(i); } + } + } if (!MO.isReg()) continue; Register Reg = MO.getReg(); diff --git a/llvm/test/CodeGen/X86/pr44140.ll b/llvm/test/CodeGen/X86/pr44140.ll index 9916252e6c499..941f45d2d99a2 100644 --- a/llvm/test/CodeGen/X86/pr44140.ll +++ b/llvm/test/CodeGen/X86/pr44140.ll @@ -10,7 +10,6 @@ define win64cc void @opaque() { ; We need xmm6 to be live from the loop header across all iterations of the loop. ; We shouldn't clobber ymm6 inside the loop. -; FIXME: We currently clobber ymm6 define i32 @main() { ; CHECK-LABEL: main: ; CHECK: # %bb.0: # %start @@ -23,7 +22,7 @@ define i32 @main() { ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 -; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm6 +; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm7 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm2 ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm3 ; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) @@ -31,10 +30,10 @@ define i32 @main() { ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm1 ; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm3, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovups %ymm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovups %ymm7, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm5 From f584f04dab69ab15c8942753a145f0c6e7693bcc Mon Sep 17 00:00:00 2001 From: Nandor Licker Date: Mon, 11 Nov 2019 11:13:34 +0000 Subject: [PATCH 134/591] [ConstExprPreter] Removed the flag forcing the use of the interpreter Summary: Removed the ```-fforce-experimental-new-constant-interpreter flag```, leaving only the ```-fexperimental-new-constant-interpreter``` one. The interpreter now always emits an error on an unsupported feature. Allowing the interpreter to bail out would require a mapping from APValue to interpreter memory, which will not be necessary in the final version. It is more sensible to always emit an error if the interpreter fails. Reviewers: jfb, Bigcheese, rsmith, dexonsmith Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70071 --- clang/docs/ConstantInterpreter.rst | 3 +- clang/include/clang/Basic/LangOptions.def | 2 - clang/include/clang/Driver/Options.td | 2 - clang/lib/AST/ExprConstant.cpp | 118 +++++++++------------- clang/lib/AST/Interp/Context.cpp | 64 ++++-------- clang/lib/AST/Interp/Context.h | 24 +---- clang/lib/Driver/ToolChains/Clang.cpp | 3 - clang/lib/Frontend/CompilerInvocation.cpp | 2 - clang/test/AST/Interp/cond.cpp | 2 +- 9 files changed, 74 insertions(+), 146 deletions(-) diff --git a/clang/docs/ConstantInterpreter.rst b/clang/docs/ConstantInterpreter.rst index d4fb8f6f34aa8..a86161c8fa011 100644 --- a/clang/docs/ConstantInterpreter.rst +++ b/clang/docs/ConstantInterpreter.rst @@ -10,8 +10,7 @@ Introduction The constexpr interpreter aims to replace the existing tree evaluator in clang, improving performance on constructs which are executed inefficiently by the evaluator. The interpreter is activated using the following flags: -* ``-fexperimental-new-constant-interpreter`` enables the interpreter, falling back to the evaluator for unsupported features -* ``-fforce-experimental-new-constant-interpreter`` forces the use of the interpreter, bailing out if an unsupported feature is encountered +* ``-fexperimental-new-constant-interpreter`` enables the interpreter, emitting an error if an unsupported feature is encountered Bytecode Compilation ==================== diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 82bf379af9097..68d6ee1dce423 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -297,8 +297,6 @@ BENIGN_LANGOPT(ConstexprStepLimit, 32, 1048576, "maximum constexpr evaluation steps") BENIGN_LANGOPT(EnableNewConstInterp, 1, 0, "enable the experimental new constant interpreter") -BENIGN_LANGOPT(ForceNewConstInterp, 1, 0, - "force the use of the experimental new constant interpreter") BENIGN_LANGOPT(BracketDepth, 32, 256, "maximum bracket nesting depth") BENIGN_LANGOPT(NumLargeByValueCopy, 32, 0, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2d501c09c7628..daba98a39dab5 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -850,8 +850,6 @@ def fconstexpr_depth_EQ : Joined<["-"], "fconstexpr-depth=">, Group; def fconstexpr_steps_EQ : Joined<["-"], "fconstexpr-steps=">, Group; def fexperimental_new_constant_interpreter : Flag<["-"], "fexperimental-new-constant-interpreter">, Group, HelpText<"Enable the experimental new constant interpreter">, Flags<[CC1Option]>; -def fforce_experimental_new_constant_interpreter : Flag<["-"], "fforce-experimental-new-constant-interpreter">, Group, - HelpText<"Force the use of the experimental new constant interpreter, failing on missing features">, Flags<[CC1Option]>; def fconstexpr_backtrace_limit_EQ : Joined<["-"], "fconstexpr-backtrace-limit=">, Group; def fno_crash_diagnostics : Flag<["-"], "fno-crash-diagnostics">, Group, Flags<[NoArgumentUnused, CoreOption]>, diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index eec9bbdaef806..df80cb4f94402 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -763,11 +763,8 @@ namespace { /// we will evaluate. unsigned StepsLeft; - /// Force the use of the experimental new constant interpreter, bailing out - /// with an error if a feature is not supported. - bool ForceNewConstInterp; - - /// Enable the experimental new constant interpreter. + /// Enable the experimental new constant interpreter. If an expression is + /// not supported by the interpreter, an error is triggered. bool EnableNewConstInterp; /// BottomFrame - The frame in which evaluation started. This must be @@ -922,9 +919,7 @@ namespace { : Ctx(const_cast(C)), EvalStatus(S), CurrentCall(nullptr), CallStackDepth(0), NextCallIndex(1), StepsLeft(C.getLangOpts().ConstexprStepLimit), - ForceNewConstInterp(C.getLangOpts().ForceNewConstInterp), - EnableNewConstInterp(ForceNewConstInterp || - C.getLangOpts().EnableNewConstInterp), + EnableNewConstInterp(C.getLangOpts().EnableNewConstInterp), BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr), EvaluatingDecl((const ValueDecl *)nullptr), EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false), @@ -13400,32 +13395,25 @@ static bool EvaluateInPlace(APValue &Result, EvalInfo &Info, const LValue &This, /// EvaluateAsRValue - Try to evaluate this expression, performing an implicit /// lvalue-to-rvalue cast if it is an lvalue. static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) { - if (Info.EnableNewConstInterp) { - auto &InterpCtx = Info.Ctx.getInterpContext(); - switch (InterpCtx.evaluateAsRValue(Info, E, Result)) { - case interp::InterpResult::Success: - return true; - case interp::InterpResult::Fail: + if (Info.EnableNewConstInterp) { + if (!Info.Ctx.getInterpContext().evaluateAsRValue(Info, E, Result)) + return false; + } else { + if (E->getType().isNull()) return false; - case interp::InterpResult::Bail: - break; - } - } - - if (E->getType().isNull()) - return false; - - if (!CheckLiteralType(Info, E)) - return false; - if (!::Evaluate(Result, Info, E)) - return false; + if (!CheckLiteralType(Info, E)) + return false; - if (E->isGLValue()) { - LValue LV; - LV.setFrom(Info.Ctx, Result); - if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result)) + if (!::Evaluate(Result, Info, E)) return false; + + if (E->isGLValue()) { + LValue LV; + LV.setFrom(Info.Ctx, Result); + if (!handleLValueToRValueConversion(Info, E, E->getType(), LV, Result)) + return false; + } } // Check this core constant expression is a constant expression. @@ -13637,46 +13625,36 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, if (Info.EnableNewConstInterp) { auto &InterpCtx = const_cast(Ctx).getInterpContext(); - switch (InterpCtx.evaluateAsInitializer(Info, VD, Value)) { - case interp::InterpResult::Fail: - // Bail out if an error was encountered. - return false; - case interp::InterpResult::Success: - // Evaluation succeeded and value was set. - return CheckConstantExpression(Info, DeclLoc, DeclTy, Value); - case interp::InterpResult::Bail: - // Evaluate the value again for the tree evaluator to use. - break; + if (!InterpCtx.evaluateAsInitializer(Info, VD, Value)) + return false; + } else { + LValue LVal; + LVal.set(VD); + + // C++11 [basic.start.init]p2: + // Variables with static storage duration or thread storage duration shall + // be zero-initialized before any other initialization takes place. + // This behavior is not present in C. + if (Ctx.getLangOpts().CPlusPlus && !VD->hasLocalStorage() && + !DeclTy->isReferenceType()) { + ImplicitValueInitExpr VIE(DeclTy); + if (!EvaluateInPlace(Value, Info, LVal, &VIE, + /*AllowNonLiteralTypes=*/true)) + return false; } - } - - LValue LVal; - LVal.set(VD); - // C++11 [basic.start.init]p2: - // Variables with static storage duration or thread storage duration shall be - // zero-initialized before any other initialization takes place. - // This behavior is not present in C. - if (Ctx.getLangOpts().CPlusPlus && !VD->hasLocalStorage() && - !DeclTy->isReferenceType()) { - ImplicitValueInitExpr VIE(DeclTy); - if (!EvaluateInPlace(Value, Info, LVal, &VIE, - /*AllowNonLiteralTypes=*/true)) + if (!EvaluateInPlace(Value, Info, LVal, this, + /*AllowNonLiteralTypes=*/true) || + EStatus.HasSideEffects) return false; - } - - if (!EvaluateInPlace(Value, Info, LVal, this, - /*AllowNonLiteralTypes=*/true) || - EStatus.HasSideEffects) - return false; - - // At this point, any lifetime-extended temporaries are completely - // initialized. - Info.performLifetimeExtension(); - if (!Info.discardCleanups()) - llvm_unreachable("Unhandled cleanup; missing full expression marker?"); + // At this point, any lifetime-extended temporaries are completely + // initialized. + Info.performLifetimeExtension(); + if (!Info.discardCleanups()) + llvm_unreachable("Unhandled cleanup; missing full expression marker?"); + } return CheckConstantExpression(Info, DeclLoc, DeclTy, Value) && CheckMemoryLeaks(Info); } @@ -14415,14 +14393,8 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD, // The constexpr VM attempts to compile all methods to bytecode here. if (Info.EnableNewConstInterp) { - auto &InterpCtx = Info.Ctx.getInterpContext(); - switch (InterpCtx.isPotentialConstantExpr(Info, FD)) { - case interp::InterpResult::Success: - case interp::InterpResult::Fail: - return Diags.empty(); - case interp::InterpResult::Bail: - break; - } + Info.Ctx.getInterpContext().isPotentialConstantExpr(Info, FD); + return Diags.empty(); } const CXXMethodDecl *MD = dyn_cast(FD); diff --git a/clang/lib/AST/Interp/Context.cpp b/clang/lib/AST/Interp/Context.cpp index 4f8f7b96e7c32..e7f9ba0f010ae 100644 --- a/clang/lib/AST/Interp/Context.cpp +++ b/clang/lib/AST/Interp/Context.cpp @@ -21,44 +21,37 @@ using namespace clang; using namespace clang::interp; -Context::Context(ASTContext &Ctx) - : Ctx(Ctx), ForceInterp(getLangOpts().ForceNewConstInterp), - P(new Program(*this)) {} +Context::Context(ASTContext &Ctx) : Ctx(Ctx), P(new Program(*this)) {} Context::~Context() {} -InterpResult Context::isPotentialConstantExpr(State &Parent, - const FunctionDecl *FD) { +bool Context::isPotentialConstantExpr(State &Parent, const FunctionDecl *FD) { Function *Func = P->getFunction(FD); if (!Func) { if (auto R = ByteCodeStmtGen(*this, *P).compileFunc(FD)) { Func = *R; - } else if (ForceInterp) { + } else { handleAllErrors(R.takeError(), [&Parent](ByteCodeGenError &Err) { Parent.FFDiag(Err.getLoc(), diag::err_experimental_clang_interp_failed); }); - return InterpResult::Fail; - } else { - consumeError(R.takeError()); - return InterpResult::Bail; + return false; } } if (!Func->isConstexpr()) - return InterpResult::Fail; + return false; APValue Dummy; return Run(Parent, Func, Dummy); } -InterpResult Context::evaluateAsRValue(State &Parent, const Expr *E, - APValue &Result) { +bool Context::evaluateAsRValue(State &Parent, const Expr *E, APValue &Result) { ByteCodeExprGen C(*this, *P, Parent, Stk, Result); return Check(Parent, C.interpretExpr(E)); } -InterpResult Context::evaluateAsInitializer(State &Parent, const VarDecl *VD, - APValue &Result) { +bool Context::evaluateAsInitializer(State &Parent, const VarDecl *VD, + APValue &Result) { ByteCodeExprGen C(*this, *P, Parent, Stk, Result); return Check(Parent, C.interpretDecl(VD)); } @@ -116,33 +109,20 @@ unsigned Context::getCharBit() const { return Ctx.getTargetInfo().getCharWidth(); } -InterpResult Context::Run(State &Parent, Function *Func, APValue &Result) { - InterpResult Flag; - { - InterpState State(Parent, *P, Stk, *this); - State.Current = new InterpFrame(State, Func, nullptr, {}, {}); - if (Interpret(State, Result)) { - Flag = InterpResult::Success; - } else { - Flag = InterpResult::Fail; - } - } - - if (Flag != InterpResult::Success) - Stk.clear(); - return Flag; +bool Context::Run(State &Parent, Function *Func, APValue &Result) { + InterpState State(Parent, *P, Stk, *this); + State.Current = new InterpFrame(State, Func, nullptr, {}, {}); + if (Interpret(State, Result)) + return true; + Stk.clear(); + return false; } -InterpResult Context::Check(State &Parent, llvm::Expected &&R) { - if (R) { - return *R ? InterpResult::Success : InterpResult::Fail; - } else if (ForceInterp) { - handleAllErrors(R.takeError(), [&Parent](ByteCodeGenError &Err) { - Parent.FFDiag(Err.getLoc(), diag::err_experimental_clang_interp_failed); - }); - return InterpResult::Fail; - } else { - consumeError(R.takeError()); - return InterpResult::Bail; - } +bool Context::Check(State &Parent, llvm::Expected &&Flag) { + if (Flag) + return *Flag; + handleAllErrors(Flag.takeError(), [&Parent](ByteCodeGenError &Err) { + Parent.FFDiag(Err.getLoc(), diag::err_experimental_clang_interp_failed); + }); + return false; } diff --git a/clang/lib/AST/Interp/Context.h b/clang/lib/AST/Interp/Context.h index 96368b6e5f02f..e4d831cbb9912 100644 --- a/clang/lib/AST/Interp/Context.h +++ b/clang/lib/AST/Interp/Context.h @@ -34,16 +34,6 @@ class Program; class State; enum PrimType : unsigned; -/// Wrapper around interpreter termination results. -enum class InterpResult { - /// Interpreter successfully computed a value. - Success, - /// Interpreter encountered an error and quit. - Fail, - /// Interpreter encountered an unimplemented feature, AST fallback. - Bail, -}; - /// Holds all information required to evaluate constexpr code in a module. class Context { public: @@ -54,15 +44,13 @@ class Context { ~Context(); /// Checks if a function is a potential constant expression. - InterpResult isPotentialConstantExpr(State &Parent, - const FunctionDecl *FnDecl); + bool isPotentialConstantExpr(State &Parent, const FunctionDecl *FnDecl); /// Evaluates a toplevel expression as an rvalue. - InterpResult evaluateAsRValue(State &Parent, const Expr *E, APValue &Result); + bool evaluateAsRValue(State &Parent, const Expr *E, APValue &Result); /// Evaluates a toplevel initializer. - InterpResult evaluateAsInitializer(State &Parent, const VarDecl *VD, - APValue &Result); + bool evaluateAsInitializer(State &Parent, const VarDecl *VD, APValue &Result); /// Returns the AST context. ASTContext &getASTContext() const { return Ctx; } @@ -78,16 +66,14 @@ class Context { private: /// Runs a function. - InterpResult Run(State &Parent, Function *Func, APValue &Result); + bool Run(State &Parent, Function *Func, APValue &Result); /// Checks a result fromt the interpreter. - InterpResult Check(State &Parent, llvm::Expected &&R); + bool Check(State &Parent, llvm::Expected &&R); private: /// Current compilation context. ASTContext &Ctx; - /// Flag to indicate if the use of the interpreter is mandatory. - bool ForceInterp; /// Interpreter stack, shared across invocations. InterpStack Stk; /// Constexpr program. diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 03ebef550cde7..26d13c7146701 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4503,9 +4503,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_fexperimental_new_constant_interpreter)) CmdArgs.push_back("-fexperimental-new-constant-interpreter"); - if (Args.hasArg(options::OPT_fforce_experimental_new_constant_interpreter)) - CmdArgs.push_back("-fforce-experimental-new-constant-interpreter"); - if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) { CmdArgs.push_back("-fbracket-depth"); CmdArgs.push_back(A->getValue()); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 56f6653755754..74831e78d8cb9 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2854,8 +2854,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, getLastArgIntValue(Args, OPT_fconstexpr_steps, 1048576, Diags); Opts.EnableNewConstInterp = Args.hasArg(OPT_fexperimental_new_constant_interpreter); - Opts.ForceNewConstInterp = - Args.hasArg(OPT_fforce_experimental_new_constant_interpreter); Opts.BracketDepth = getLastArgIntValue(Args, OPT_fbracket_depth, 256, Diags); Opts.DelayedTemplateParsing = Args.hasArg(OPT_fdelayed_template_parsing); Opts.NumLargeByValueCopy = diff --git a/clang/test/AST/Interp/cond.cpp b/clang/test/AST/Interp/cond.cpp index 8a5a318c216d9..1fc69ed333e15 100644 --- a/clang/test/AST/Interp/cond.cpp +++ b/clang/test/AST/Interp/cond.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fforce-experimental-new-constant-interpreter %s -verify +// RUN: %clang_cc1 -std=c++17 -fsyntax-only -fexperimental-new-constant-interpreter %s -verify // RUN: %clang_cc1 -std=c++17 -fsyntax-only %s -verify // expected-no-diagnostics From dcceab1a0ace424ba4877f53c4c69433e53f1f3f Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Wed, 27 Nov 2019 12:50:23 -0600 Subject: [PATCH 135/591] [PowerPC] Add new Future CPU for PowerPC in LLVM This is a continuation of D70262 The previous patch as listed above added the future CPU in clang. This patch adds the future CPU in the PowerPC backend. At this point the patch simply assumes that a future CPU will have the same characteristics as pwr9. Those characteristics may change with later patches. Differential Revision: https://reviews.llvm.org/D70333 --- llvm/lib/Support/Host.cpp | 3 +++ llvm/lib/Target/PowerPC/PPC.td | 12 ++++++++++++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 3 ++- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 ++++- llvm/lib/Target/PowerPC/PPCSubtarget.h | 1 + llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 6 ++++-- llvm/test/CodeGen/PowerPC/check-cpu.ll | 11 +++++++++++ 7 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/check-cpu.ll diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 7e07b8f7ca264..ef38c1c09413a 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -140,6 +140,9 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { .Case("POWER8E", "pwr8") .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") + // FIXME: If we get a simulator or machine with the capabilities of + // mcpu=future, we should revisit this and add the name reported by the + // simulator/machine. .Default(generic); } diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index de007d3b8d0b2..6fa46f0a7dced 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -51,6 +51,8 @@ def DirectivePwr6x def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">; def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">; def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">; +def DirectivePwrFuture + : SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -239,6 +241,13 @@ def ProcessorFeatures { FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched]; list Power9FeatureList = !listconcat(Power8FeatureList, Power9SpecificFeatures); + + // For future CPU we assume that all of the existing features from Power 9 + // still exist. + list FutureSpecificFeatures = + []; + list FutureFeatureList = + !listconcat(Power9FeatureList, FutureSpecificFeatures); } // Note: Future features to add when support is extended to more @@ -441,6 +450,9 @@ def : ProcessorModel<"pwr6x", G5Model, def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>; +// No scheduler model for future CPU. +def : ProcessorModel<"future", NoSchedModel, + ProcessorFeatures.FutureFeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat, FeatureMFTB]>; def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat, diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 53dbb02bb8e4b..33198efb05f8d 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1603,7 +1603,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { // FIXME: why is power8 missing here? "ppc64", "ppc64le", - "power9" + "power9", + "future" }; // Get the numerically largest directive. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 3c59cea7f96e2..8730c88f43531 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1217,6 +1217,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: setPrefLoopAlignment(Align(16)); setPrefFunctionAlignment(Align(16)); break; @@ -14204,7 +14205,8 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: - case PPC::DIR_PWR9: { + case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: { if (!ML) break; @@ -15383,6 +15385,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { // vector 7 2 2 return true; case PPC::DIR_PWR9: + case PPC::DIR_PWR_FUTURE: // type mul add shl // scalar 5 2 2 // vector 7 2 2 diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index dcf64a5d6f9b8..7266d82a08b54 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -57,6 +57,7 @@ namespace PPC { DIR_PWR7, DIR_PWR8, DIR_PWR9, + DIR_PWR_FUTURE, DIR_64 }; } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 380d718885251..7079498cd815e 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -651,8 +651,9 @@ unsigned PPCTTIImpl::getCacheLineSize() const { // On P7, P8 or P9 we have a cache line size of 128. unsigned Directive = ST->getCPUDirective(); + // Assume that Future CPU has the same cache line size as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || - Directive == PPC::DIR_PWR9) + Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) return 128; // On other processors return a default of 64 bytes. @@ -684,8 +685,9 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { // For P7 and P8, floating-point instructions have a 6-cycle latency and // there are two execution units, so unroll by 12x for latency hiding. // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready + // Assume that future is the same as the others. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || - Directive == PPC::DIR_PWR9) + Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR_FUTURE) return 12; // For most things, modern systems have two execution units (and diff --git a/llvm/test/CodeGen/PowerPC/check-cpu.ll b/llvm/test/CodeGen/PowerPC/check-cpu.ll new file mode 100644 index 0000000000000..baa39024ebe8d --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/check-cpu.ll @@ -0,0 +1,11 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=future < %s | FileCheck %s + + +; Test mcpu=future that should be recognized on PowerPC. + +; CHECK-NOT: is not a recognized processor for this target +; CHECK: .text + From 340e7c0b77a7037afefe7255503afe362967b577 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 27 Nov 2019 12:34:36 -0800 Subject: [PATCH 136/591] build: avoid hardcoding the libxml2 library name FindLibXml2 will set the LIBXML2_LIBRARIES variable to the libraries that we must link against. This will be an empty string if libxml2 is not found. Avoid hardcoding the library name as xml2 in the configuration. Simplify the usage in the WindowsManifest library. --- llvm/cmake/config-ix.cmake | 1 - llvm/lib/WindowsManifest/CMakeLists.txt | 12 +++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 028a2cc86bf38..e7e5e5dcf2ff3 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -166,7 +166,6 @@ if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") else() include_directories(${LIBXML2_INCLUDE_DIR}) endif() - set(LIBXML2_LIBS "xml2") endif() endif() endif() diff --git a/llvm/lib/WindowsManifest/CMakeLists.txt b/llvm/lib/WindowsManifest/CMakeLists.txt index 4f2d011d54348..8868564da76f3 100644 --- a/llvm/lib/WindowsManifest/CMakeLists.txt +++ b/llvm/lib/WindowsManifest/CMakeLists.txt @@ -1,10 +1,3 @@ -set(system_libs) -if( CMAKE_HOST_UNIX ) - if( LLVM_LIBXML2_ENABLED ) - set(system_libs ${system_libs} ${LIBXML2_LIBS}) - endif() -endif() - add_llvm_component_library(LLVMWindowsManifest WindowsManifestMerger.cpp @@ -12,7 +5,8 @@ add_llvm_component_library(LLVMWindowsManifest ${LLVM_MAIN_INCLUDE_DIR}/llvm/WindowsManifest ${Backtrace_INCLUDE_DIRS} - LINK_LIBS ${system_libs} + LINK_LIBS ${LIBXML2_LIBRARIES} ) -set_property(TARGET LLVMWindowsManifest PROPERTY LLVM_SYSTEM_LIBS "${system_libs}") +set_property(TARGET LLVMWindowsManifest PROPERTY + LLVM_SYSTEM_LIBS ${LIBXML2_LIBRARIES}) From cfcfd8a056eb7c01bc76b745ce9f7839f0dcbc42 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 27 Nov 2019 12:55:46 -0800 Subject: [PATCH 137/591] build: avoid cached literals being linked against If the value of the LibXml2 search is cached, it can cause an errant link against LIBXML2_LIBRARIES-NOTFOUND if libxml2 is not found. Add a guard against this. Should repair the build bots. --- llvm/lib/WindowsManifest/CMakeLists.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/WindowsManifest/CMakeLists.txt b/llvm/lib/WindowsManifest/CMakeLists.txt index 8868564da76f3..fe6ddcd414d56 100644 --- a/llvm/lib/WindowsManifest/CMakeLists.txt +++ b/llvm/lib/WindowsManifest/CMakeLists.txt @@ -3,10 +3,10 @@ add_llvm_component_library(LLVMWindowsManifest ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/WindowsManifest - ${Backtrace_INCLUDE_DIRS} + ${Backtrace_INCLUDE_DIRS}) - LINK_LIBS ${LIBXML2_LIBRARIES} - ) - -set_property(TARGET LLVMWindowsManifest PROPERTY - LLVM_SYSTEM_LIBS ${LIBXML2_LIBRARIES}) +if(LIBXML2_LIBRARIES) + target_link_libraries(LLVMWindowsManifest PUBLIC ${LIBXML2_LIBRARIES}) + set_property(TARGET LLVMWindowsManifest PROPERTY + LLVM_SYSTEM_LIBS ${LIBXML2_LIBRARIES}) +endif() From 7ca7d62c6ea1680ec0a1861083669596547fdd6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Tue, 26 Nov 2019 20:50:56 +0100 Subject: [PATCH 138/591] [Attributor] Move pass after InstCombine to futher eliminate null pointer checks Summary: PR44149 Reviewers: jdoerfert Subscribers: mehdi_amini, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70737 --- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 5 +- .../PhaseOrdering/null-check-elim.ll | 47 +++++++++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/PhaseOrdering/null-check-elim.ll diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 81424229c3bfe..db25e617237b8 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -530,9 +530,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); - // Infer attributes on declarations, call sites, arguments, etc. - MPM.add(createAttributorLegacyPass()); - MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); @@ -540,6 +537,8 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createDeadArgEliminationPass()); // Dead argument elimination addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + // Infer attributes on declarations, call sites, arguments, etc. + MPM.add(createAttributorLegacyPass()); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE diff --git a/llvm/test/Transforms/PhaseOrdering/null-check-elim.ll b/llvm/test/Transforms/PhaseOrdering/null-check-elim.ll new file mode 100644 index 0000000000000..4c0604512d1f4 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/null-check-elim.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O3 -attributor-disable=false -S < %s | FileCheck %s --check-prefixes=ANY,OLDPM +; RUN: opt -passes='default' -attributor-disable=false -S < %s | FileCheck %s --check-prefixes=ANY,NEWPM + +@p = external global [2 x i8*], align 16 + +define void @test(i8* %arg, i32 %arg1) { +; OLDPM-LABEL: @test( +; OLDPM-NEXT: bb5: +; OLDPM-NEXT: [[TMP:%.*]] = tail call i8* @strchr(i8* nofree nonnull dereferenceable(1) [[ARG:%.*]], i32 [[ARG1:%.*]]) #1 +; OLDPM-NEXT: store i8* [[TMP]], i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 0), align 16 +; OLDPM-NEXT: [[TMP4:%.*]] = tail call i8* @foo(i8* nonnull [[ARG]]) +; OLDPM-NEXT: store i8* [[TMP4]], i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 1), align 8 +; OLDPM-NEXT: ret void +; +; NEWPM-LABEL: @test( +; NEWPM-NEXT: bb: +; NEWPM-NEXT: [[TMP:%.*]] = tail call i8* @strchr(i8* nonnull dereferenceable(1) [[ARG:%.*]], i32 [[ARG1:%.*]]) +; NEWPM-NEXT: store i8* [[TMP]], i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 0), align 16 +; NEWPM-NEXT: [[TMP2:%.*]] = icmp eq i8* [[ARG]], null +; NEWPM-NEXT: br i1 [[TMP2]], label [[BB5:%.*]], label [[BB3:%.*]] +; NEWPM: bb3: +; NEWPM-NEXT: [[TMP4:%.*]] = tail call i8* @foo(i8* nonnull [[ARG]]) +; NEWPM-NEXT: br label [[BB5]] +; NEWPM: bb5: +; NEWPM-NEXT: [[TMP6:%.*]] = phi i8* [ [[TMP4]], [[BB3]] ], [ null, [[BB:%.*]] ] +; NEWPM-NEXT: store i8* [[TMP6]], i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 1), align 8 +; NEWPM-NEXT: ret void +; +bb: + %tmp = tail call i8* @strchr(i8* %arg, i32 %arg1) + store i8* %tmp, i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 0), align 16 + %tmp2 = icmp eq i8* %arg, null + br i1 %tmp2, label %bb5, label %bb3 + +bb3: ; preds = %bb + %tmp4 = tail call i8* @foo(i8* %arg) + br label %bb5 + +bb5: ; preds = %bb3, %bb + %tmp6 = phi i8* [ %tmp4, %bb3 ], [ null, %bb ] + store i8* %tmp6, i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 1), align 8 + ret void +} + +declare i8* @strchr(i8*, i32) +declare i8* @foo(i8*) From 549ff601f053303356abe7d8fca8fbcf5e3502e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Wed, 27 Nov 2019 22:36:29 +0100 Subject: [PATCH 139/591] Try to reenable -Wdeprecated-copy under -Wextra --- clang/include/clang/Basic/DiagnosticGroups.td | 1 + clang/test/SemaCXX/deprecated-copy.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 9f5900f5bec85..5bfb3de86a477 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -816,6 +816,7 @@ def Move : DiagGroup<"move", [ ]>; def Extra : DiagGroup<"extra", [ + DeprecatedCopy, MissingFieldInitializers, IgnoredQualifiers, InitializerOverrides, diff --git a/clang/test/SemaCXX/deprecated-copy.cpp b/clang/test/SemaCXX/deprecated-copy.cpp index c2ab3c40bbae6..4d3e798d912ba 100644 --- a/clang/test/SemaCXX/deprecated-copy.cpp +++ b/clang/test/SemaCXX/deprecated-copy.cpp @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -std=c++11 %s -Wdeprecated-copy -verify // RUN: %clang_cc1 -std=c++11 %s -Wdeprecated-copy-dtor -DDEPRECATED_COPY_DTOR -verify +// RUN: %clang_cc1 -std=c++11 %s -Wextra -verify #ifdef DEPRECATED_COPY_DTOR struct A { From 8e84c9ae99846c91c4e9828f1945c200d26d2fb9 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Wed, 27 Nov 2019 15:38:05 -0600 Subject: [PATCH 140/591] [PowerPC] Separate Features that are known to be Power9 specific from Future CPU The Power 9 CPU has some features that are unlikely to be passed on to future versions of the CPU. This patch separates this out so that future CPU does not inherit them. Differential Revision: https://reviews.llvm.org/D70466 --- llvm/lib/Target/PowerPC/PPC.td | 17 +++++++++++++---- .../CostModel/PowerPC/future-cost-model.ll | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 6fa46f0a7dced..a83bfc5abd7dd 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -237,13 +237,22 @@ def ProcessorFeatures { list Power8FeatureList = !listconcat(Power7FeatureList, Power8SpecificFeatures); list Power9SpecificFeatures = - [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0, - FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched]; + [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0]; + + // Some features are unique to Power9 and there is no reason to assume + // they will be part of any future CPUs. One example is the narrower + // dispatch for vector operations than scalar ones. For the time being, + // this list also includes scheduling-related features since we do not have + // enough info to create custom scheduling strategies for future CPUs. + list Power9OnlyFeatures = + [FeatureVectorsUseTwoUnits, FeaturePPCPreRASched, FeaturePPCPostRASched]; list Power9FeatureList = !listconcat(Power8FeatureList, Power9SpecificFeatures); + list Power9ImplList = + !listconcat(Power9FeatureList, Power9OnlyFeatures); // For future CPU we assume that all of the existing features from Power 9 - // still exist. + // still exist with the exception of those we know are Power 9 specific. list FutureSpecificFeatures = []; list FutureFeatureList = @@ -449,7 +458,7 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; -def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>; +def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9ImplList>; // No scheduler model for future CPU. def : ProcessorModel<"future", NoSchedModel, ProcessorFeatures.FutureFeatureList>; diff --git a/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll b/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll new file mode 100644 index 0000000000000..3e4fb82e600c6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/PowerPC/future-cost-model.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future | FileCheck %s --check-prefix=FUTURE +; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 | FileCheck %s --check-prefix=PWR9 + +define void @test(i16 %p1, i16 %p2, <4 x i16> %p3, <4 x i16> %p4) { + %i1 = add i16 %p1, %p2 + %v1 = add <4 x i16> %p3, %p4 + ret void + ; FUTURE: cost of 1 {{.*}} add + ; FUTURE: cost of 1 {{.*}} add + + ; PWR9: cost of 1 {{.*}} add + ; PWR9: cost of 2 {{.*}} add +} + From 40963b2bf0e72626d21917a08f3c86e56541ae7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Wed, 27 Nov 2019 22:44:43 +0100 Subject: [PATCH 141/591] Revert "[Attributor] Move pass after InstCombine to futher eliminate null pointer checks" This reverts commit 7ca7d62c6ea1680ec0a1861083669596547fdd6f. Commited accidentally. --- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 5 +- .../PhaseOrdering/null-check-elim.ll | 47 ------------------- 2 files changed, 3 insertions(+), 49 deletions(-) delete mode 100644 llvm/test/Transforms/PhaseOrdering/null-check-elim.ll diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index db25e617237b8..81424229c3bfe 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -530,6 +530,9 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); + // Infer attributes on declarations, call sites, arguments, etc. + MPM.add(createAttributorLegacyPass()); + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); @@ -537,8 +540,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createDeadArgEliminationPass()); // Dead argument elimination addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE - // Infer attributes on declarations, call sites, arguments, etc. - MPM.add(createAttributorLegacyPass()); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE diff --git a/llvm/test/Transforms/PhaseOrdering/null-check-elim.ll b/llvm/test/Transforms/PhaseOrdering/null-check-elim.ll deleted file mode 100644 index 4c0604512d1f4..0000000000000 --- a/llvm/test/Transforms/PhaseOrdering/null-check-elim.ll +++ /dev/null @@ -1,47 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -O3 -attributor-disable=false -S < %s | FileCheck %s --check-prefixes=ANY,OLDPM -; RUN: opt -passes='default' -attributor-disable=false -S < %s | FileCheck %s --check-prefixes=ANY,NEWPM - -@p = external global [2 x i8*], align 16 - -define void @test(i8* %arg, i32 %arg1) { -; OLDPM-LABEL: @test( -; OLDPM-NEXT: bb5: -; OLDPM-NEXT: [[TMP:%.*]] = tail call i8* @strchr(i8* nofree nonnull dereferenceable(1) [[ARG:%.*]], i32 [[ARG1:%.*]]) #1 -; OLDPM-NEXT: store i8* [[TMP]], i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 0), align 16 -; OLDPM-NEXT: [[TMP4:%.*]] = tail call i8* @foo(i8* nonnull [[ARG]]) -; OLDPM-NEXT: store i8* [[TMP4]], i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 1), align 8 -; OLDPM-NEXT: ret void -; -; NEWPM-LABEL: @test( -; NEWPM-NEXT: bb: -; NEWPM-NEXT: [[TMP:%.*]] = tail call i8* @strchr(i8* nonnull dereferenceable(1) [[ARG:%.*]], i32 [[ARG1:%.*]]) -; NEWPM-NEXT: store i8* [[TMP]], i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 0), align 16 -; NEWPM-NEXT: [[TMP2:%.*]] = icmp eq i8* [[ARG]], null -; NEWPM-NEXT: br i1 [[TMP2]], label [[BB5:%.*]], label [[BB3:%.*]] -; NEWPM: bb3: -; NEWPM-NEXT: [[TMP4:%.*]] = tail call i8* @foo(i8* nonnull [[ARG]]) -; NEWPM-NEXT: br label [[BB5]] -; NEWPM: bb5: -; NEWPM-NEXT: [[TMP6:%.*]] = phi i8* [ [[TMP4]], [[BB3]] ], [ null, [[BB:%.*]] ] -; NEWPM-NEXT: store i8* [[TMP6]], i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 1), align 8 -; NEWPM-NEXT: ret void -; -bb: - %tmp = tail call i8* @strchr(i8* %arg, i32 %arg1) - store i8* %tmp, i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 0), align 16 - %tmp2 = icmp eq i8* %arg, null - br i1 %tmp2, label %bb5, label %bb3 - -bb3: ; preds = %bb - %tmp4 = tail call i8* @foo(i8* %arg) - br label %bb5 - -bb5: ; preds = %bb3, %bb - %tmp6 = phi i8* [ %tmp4, %bb3 ], [ null, %bb ] - store i8* %tmp6, i8** getelementptr inbounds ([2 x i8*], [2 x i8*]* @p, i64 0, i64 1), align 8 - ret void -} - -declare i8* @strchr(i8*, i32) -declare i8* @foo(i8*) From 98740643f794254342e22d106eafb73cf459f465 Mon Sep 17 00:00:00 2001 From: David Tenty Date: Wed, 27 Nov 2019 17:17:02 -0500 Subject: [PATCH 142/591] [AIX] Emit TOC entries for ASM printing Summary: Emit the correct .toc psuedo op when we change to the TOC and emit TC entries. Make sure TOC psuedos get the right symbols via overriding getMCSymbolForTOCPseudoMO on AIX. Add a test for TOC assembly writing and update tests to include TOC entries. Also make sure external globals have a csect set and handle external function descriptor (originally authored by Jason Liu) so we can emit TOC entries for them. Reviewers: DiggerLin, sfertile, Xiangling_L, jasonliu, hubert.reinterpretcast Reviewed By: jasonliu Subscribers: arphaman, wuzish, nemanjai, hiraditya, kbarton, jsji, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70461 --- llvm/include/llvm/MC/MCAsmInfo.h | 6 + llvm/include/llvm/MC/MCSymbolXCOFF.h | 12 ++ llvm/lib/BinaryFormat/XCOFF.cpp | 4 + llvm/lib/MC/MCSectionXCOFF.cpp | 2 + .../PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 1 + .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 8 +- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 130 +++++++++++++++--- .../PowerPC/aix-lower-block-address.ll | 2 +- .../PowerPC/aix-lower-constant-pool-index.ll | 2 +- .../CodeGen/PowerPC/aix-lower-jump-table.ll | 2 +- .../PowerPC/aix-xcoff-data-only-notoc.ll | 12 ++ llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll | 48 +++++++ .../PowerPC/lower-globaladdr32-aix-asm.ll | 4 +- .../PowerPC/lower-globaladdr64-aix-asm.ll | 4 +- 14 files changed, 208 insertions(+), 29 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-data-only-notoc.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 589f1dfe90b81..5a6dff64caef7 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -333,6 +333,10 @@ class MCAsmInfo { /// protected visibility. Defaults to MCSA_Protected MCSymbolAttr ProtectedVisibilityAttr = MCSA_Protected; + // This attribute is used to indicate symbols such as commons on AIX may have + // a storage mapping class embedded in the name. + bool SymbolsHaveSMC = false; + //===--- Dwarf Emission Directives -----------------------------------===// /// True if target supports emission of debugging information. Defaults to @@ -587,6 +591,8 @@ class MCAsmInfo { return ProtectedVisibilityAttr; } + bool getSymbolsHaveSMC() const { return SymbolsHaveSMC; } + bool doesSupportDebugInformation() const { return SupportsDebugInformation; } bool doesSupportExceptionHandling() const { diff --git a/llvm/include/llvm/MC/MCSymbolXCOFF.h b/llvm/include/llvm/MC/MCSymbolXCOFF.h index 8bc7817404392..07dfb5d299776 100644 --- a/llvm/include/llvm/MC/MCSymbolXCOFF.h +++ b/llvm/include/llvm/MC/MCSymbolXCOFF.h @@ -9,6 +9,7 @@ #define LLVM_MC_MCSYMBOLXCOFF_H #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCSymbol.h" @@ -50,6 +51,17 @@ class MCSymbolXCOFF : public MCSymbol { bool hasContainingCsect() const { return ContainingCsect != nullptr; } + StringRef getUnqualifiedName() const { + const StringRef name = getName(); + if (name.back() == ']') { + StringRef lhs, rhs; + std::tie(lhs, rhs) = name.rsplit('['); + assert(!rhs.empty() && "Invalid SMC format in XCOFF symbol."); + return lhs; + } + return name; + } + private: Optional StorageClass; MCSectionXCOFF *ContainingCsect = nullptr; diff --git a/llvm/lib/BinaryFormat/XCOFF.cpp b/llvm/lib/BinaryFormat/XCOFF.cpp index 001b8077cd3d1..29ccbaea3584d 100644 --- a/llvm/lib/BinaryFormat/XCOFF.cpp +++ b/llvm/lib/BinaryFormat/XCOFF.cpp @@ -24,6 +24,10 @@ StringRef XCOFF::getMappingClassString(XCOFF::StorageMappingClass SMC) { return "BS"; case XCOFF::XMC_RO: return "RO"; + case XCOFF::XMC_UA: + return "UA"; + case XCOFF::XMC_TC: + return "TC"; default: report_fatal_error("Unhandled storage-mapping class."); } diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp index f646168d3a4a7..8377e295532ae 100644 --- a/llvm/lib/MC/MCSectionXCOFF.cpp +++ b/llvm/lib/MC/MCSectionXCOFF.cpp @@ -40,6 +40,8 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, case XCOFF::XMC_DS: OS << "\t.csect " << QualName->getName() << '\n'; break; + case XCOFF::XMC_TC: + break; case XCOFF::XMC_TC0: OS << "\t.toc\n"; break; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 1216cd7272893..a61c34ca6f14b 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -87,4 +87,5 @@ PPCXCOFFMCAsmInfo::PPCXCOFFMCAsmInfo(bool Is64Bit, const Triple &T) { assert(!IsLittleEndian && "Little-endian XCOFF not supported."); CodePointerSize = CalleeSaveStackSlotSize = Is64Bit ? 8 : 4; ZeroDirective = "\t.space\t"; + SymbolsHaveSMC = true; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index a9717bfc3082a..3cc1f40231660 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" @@ -108,8 +109,11 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer { : PPCTargetStreamer(S), OS(OS) {} void emitTCEntry(const MCSymbol &S) override { + const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo(); OS << "\t.tc "; - OS << S.getName(); + OS << (MAI->getSymbolsHaveSMC() + ? cast(S).getUnqualifiedName() + : S.getName()); OS << "[TC],"; OS << S.getName(); OS << '\n'; @@ -243,7 +247,7 @@ class PPCTargetXCOFFStreamer : public PPCTargetStreamer { PPCTargetXCOFFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} void emitTCEntry(const MCSymbol &S) override { - report_fatal_error("TOC entries not supported yet."); + // Object writing TOC entries not supported yet. } void emitMachine(StringRef CPU) override { diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 33198efb05f8d..08e305d69e7e1 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -43,6 +43,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -82,6 +83,8 @@ class PPCAsmPrinter : public AsmPrinter { const PPCSubtarget *Subtarget = nullptr; StackMaps SM; + virtual MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO); + public: explicit PPCAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) @@ -161,6 +164,11 @@ class PPCDarwinAsmPrinter : public PPCAsmPrinter { }; class PPCAIXAsmPrinter : public PPCAsmPrinter { +private: + static void ValidateGV(const GlobalVariable *GV); +protected: + MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO) override; + public: PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : PPCAsmPrinter(TM, std::move(Streamer)) {} @@ -514,17 +522,16 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI, /// Map a machine operand for a TOC pseudo-machine instruction to its /// corresponding MCSymbol. -static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO, - AsmPrinter &AP) { +MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_GlobalAddress: - return AP.getSymbol(MO.getGlobal()); + return getSymbol(MO.getGlobal()); case MachineOperand::MO_ConstantPoolIndex: - return AP.GetCPISymbol(MO.getIndex()); + return GetCPISymbol(MO.getIndex()); case MachineOperand::MO_JumpTableIndex: - return AP.GetJTISymbol(MO.getIndex()); + return GetJTISymbol(MO.getIndex()); case MachineOperand::MO_BlockAddress: - return AP.GetBlockAddressSymbol(MO.getBlockAddress()); + return GetBlockAddressSymbol(MO.getBlockAddress()); default: llvm_unreachable("Unexpected operand type to get symbol."); } @@ -688,7 +695,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Invalid operand for LWZtoc."); // Map the operand to its corresponding MCSymbol. - const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO); // Create a reference to the GOT entry for the symbol. The GOT entry will be // synthesized later. @@ -749,7 +756,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // global address operand to be a reference to the TOC entry we will // synthesize later. MCSymbol *TOCEntry = - lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this)); + lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO)); const MCSymbolRefExpr::VariantKind VK = IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC; @@ -775,7 +782,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Invalid operand for ADDIStocHA."); // Map the machine operand to its corresponding MCSymbol. - MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); // Always use TOC on AIX. Map the global address operand to be a reference // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to @@ -805,7 +812,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Invalid operand for LWZtocL."); // Map the machine operand to its corresponding MCSymbol. - MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); // Always use TOC on AIX. Map the global address operand to be a reference // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to @@ -835,7 +842,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) && "Invalid operand for ADDIStocHA8!"); - const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); const bool GlobalToc = MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal()); @@ -881,7 +888,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "LDtocL used on symbol that could be accessed directly is " "invalid. Must match ADDIStocHA8.")); - const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this); + const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO); if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); @@ -911,7 +918,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Interposable definitions must use indirect access.")); const MCExpr *Exp = - MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this), + MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO), MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext); TmpInst.getOperand(2) = MCOperand::createExpr(Exp); EmitToStreamer(*OutStreamer, TmpInst); @@ -1736,7 +1743,7 @@ void PPCAIXAsmPrinter::SetupMachineFunction(MachineFunction &MF) { return AsmPrinter::SetupMachineFunction(MF); } -void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { +void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) { // Early error checking limiting what is supported. if (GV->isThreadLocal()) report_fatal_error("Thread local not yet supported on AIX."); @@ -1746,6 +1753,19 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (GV->hasComdat()) report_fatal_error("COMDAT not yet supported by AIX."); +} + +void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + ValidateGV(GV); + + // External global variables are already handled. + if (!GV->hasInitializer()) + return; + + // Create the symbol, set its storage class. + MCSymbolXCOFF *GVSym = cast(getSymbol(GV)); + GVSym->setStorageClass( + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM); if ((!GVKind.isCommon() && !GVKind.isBSS() && !GVKind.isData() && @@ -1759,11 +1779,6 @@ void PPCAIXAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSectionXCOFF *Csect = cast( getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); OutStreamer->SwitchSection(Csect); - - // Create the symbol, set its storage class, and emit it. - MCSymbolXCOFF *GVSym = cast(getSymbol(GV)); - GVSym->setStorageClass( - TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV)); GVSym->setContainingCsect(Csect); const DataLayout &DL = GV->getParent()->getDataLayout(); @@ -1802,7 +1817,10 @@ void PPCAIXAsmPrinter::EmitFunctionDescriptor() { OutStreamer->EmitValue(MCSymbolRefExpr::create(CurrentFnSym, OutContext), PointerSize); // Emit TOC base address. - MCSymbol *TOCBaseSym = OutContext.getOrCreateSymbol(StringRef("TOC[TC0]")); + const MCSectionXCOFF *TOCBaseSec = OutStreamer->getContext().getXCOFFSection( + StringRef("TOC"), XCOFF::XMC_TC0, XCOFF::XTY_SD, XCOFF::C_HIDEXT, + SectionKind::getData()); + const MCSymbol *TOCBaseSym = TOCBaseSec->getQualNameSymbol(); OutStreamer->EmitValue(MCSymbolRefExpr::create(TOCBaseSym, OutContext), PointerSize); // Emit a null environment pointer. @@ -1823,8 +1841,80 @@ void PPCAIXAsmPrinter::EmitEndOfAsmFile(Module &M) { SectionKind::getData()); // Switch to section to emit TOC base. OutStreamer->SwitchSection(TOCBaseSection); + + PPCTargetStreamer &TS = + static_cast(*OutStreamer->getTargetStreamer()); + + for (auto &I : TOC) { + // Setup the csect for the current TC entry. + MCSectionXCOFF *TCEntry = OutStreamer->getContext().getXCOFFSection( + cast(I.first)->getUnqualifiedName(), XCOFF::XMC_TC, + XCOFF::XTY_SD, XCOFF::C_HIDEXT, SectionKind::getData()); + cast(I.second)->setContainingCsect(TCEntry); + OutStreamer->SwitchSection(TCEntry); + + OutStreamer->EmitLabel(I.second); + TS.emitTCEntry(*I.first); + } } +MCSymbol * +PPCAIXAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) { + const GlobalObject *GO = nullptr; + + // If the MO is a function or certain kind of globals, we want to make sure to + // refer to the csect symbol, otherwise we can just do the default handling. + if (MO.getType() != MachineOperand::MO_GlobalAddress || + !(GO = dyn_cast(MO.getGlobal()))) + return PPCAsmPrinter::getMCSymbolForTOCPseudoMO(MO); + + // Do an early error check for globals we don't support. This will go away + // eventually. + const auto *GV = dyn_cast(GO); + if (GV) { + ValidateGV(GV); + } + + MCSymbolXCOFF *XSym = cast(getSymbol(GO)); + + // If the global object is a global variable without initializer or is a + // declaration of a function, then XSym is an external referenced symbol. + // Hence we may need to explictly create a MCSectionXCOFF for it so that we + // can return its symbol later. + if (GO->isDeclaration() && !XSym->hasContainingCsect()) { + // Make sure the storage class is set. + const XCOFF::StorageClass SC = + TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO); + XSym->setStorageClass(SC); + + MCSectionXCOFF *Csect = OutStreamer->getContext().getXCOFFSection( + XSym->getName(), isa(GO) ? XCOFF::XMC_DS : XCOFF::XMC_UA, + XCOFF::XTY_ER, SC, SectionKind::getMetadata()); + XSym->setContainingCsect(Csect); + + return Csect->getQualNameSymbol(); + } + + // Handle initialized global variables. + if (GV) { + SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM); + + // If the operand is a common then we should refer to the csect symbol. + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + MCSectionXCOFF *Csect = cast( + getObjFileLowering().SectionForGlobal(GV, GVKind, TM)); + return Csect->getQualNameSymbol(); + } + + // Other global variables are refered to by labels inside of a single csect, + // so refer to the label directly. + return getSymbol(GV); + } + + // If the MO is a function, we want to make sure to refer to the function + // descriptor csect. + return XSym->getContainingCsect()->getQualNameSymbol(); +} /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code /// for a MachineFunction to the given output stream, in a format that the diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll b/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll index 2d6353876a331..b4b9f029ed0bb 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-block-address.ll @@ -69,4 +69,4 @@ __here: ; 64LARGE-ASM: ld [[REG2:[0-9]+]], LC0@l([[REG1]]) ; CHECK: .toc -; CHECK-NOT: .tc +; CHECK: .tc Ltmp0[TC],Ltmp0 diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll b/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll index 8803a1e4569fb..1db8a55fb28e5 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-constant-pool-index.ll @@ -84,4 +84,4 @@ entry: ; 64LARGE-ASM: blr ; CHECK: .toc -; CHECK-NOT: .tc +; CHECK: .tc .LCPI0_0[TC],.LCPI0_0 diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll index 5efb956b1529d..a5ec1942a3157 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lower-jump-table.ll @@ -185,4 +185,4 @@ ; 64LARGE-ASM: .long LBB0_5-.LJTI0_0 ; CHECK: .toc -; CHECK-NOT: .tc +; CHECK: .tc .LJTI0_0[TC],.LJTI0_0 diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-data-only-notoc.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-data-only-notoc.ll new file mode 100644 index 0000000000000..bc23b29157481 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-data-only-notoc.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s + +@a = external global i32, align 4 +@b = external global i64, align 8 +@c = external global i16, align 2 +@globa = common global i32 0, align 4 + +@ptr = internal global void (...)* null, align 4 + +; CHECK-NOT: .toc + diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll new file mode 100644 index 0000000000000..4d6d7dc5bb42b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-toc.ll @@ -0,0 +1,48 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefixes CHECK,CHECK32 %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck --check-prefixes CHECK,CHECK64 %s + +@a = external global i32, align 4 +@b = external global i64, align 8 +@c = external global i16, align 2 +@globa = common global i32 0, align 4 + +@ptr = internal global void (...)* null, align 4 + +declare void @foo() + +define void @bar() { + %1 = alloca i8*, align 8 + store i32 0, i32* @a, align 4 + store i64 0, i64* @b, align 8 + store i16 0, i16* @c, align 2 + store i32 0, i32* @globa, align 4 + store void (...)* bitcast (void ()* @bar to void (...)*), void (...)** @ptr, align 4 + store i8* bitcast (void ()* @foo to i8*), i8** %1, align 8 + ret void +} + +; CHECK-NOT: .comm a +; CHECK-NOT: .lcomm a +; CHECK-NOT: .comm b +; CHECK-NOT: .lcomm b +; CHECK-NOT: .comm c +; CHECK-NOT: .lcomm c +; CHECK: .comm globa[RW],4,2 +; CHECK32: .lcomm ptr,4,ptr[BS],2 +; CHECK64: .lcomm ptr,8,ptr[BS],2 +; CHECK: .toc +; CHECK-NEXT: LC0: +; CHECK-NEXT: .tc a[TC],a[UA] +; CHECK-NEXT: LC1: +; CHECK-NEXT: .tc b[TC],b[UA] +; CHECK-NEXT: LC2: +; CHECK-NEXT: .tc c[TC],c[UA] +; CHECK-NEXT: LC3: +; CHECK-NEXT: .tc globa[TC],globa[RW] +; CHECK-NEXT: LC4: +; CHECK-NEXT: .tc ptr[TC],ptr[BS] +; CHECK-NEXT: LC5: +; CHECK-NEXT: .tc bar[TC],bar[DS] +; CHECK-NEXT: LC6: +; CHECK-NEXT: .tc foo[TC],foo[DS] + diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll index e48f43a2d4b32..e3254175dbe96 100644 --- a/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll +++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr32-aix-asm.ll @@ -41,5 +41,5 @@ define void @test_store(i32 %0) { ; LARGE: stw [[REG3:[0-9]+]], 0([[REG2]]) ; LARGE: blr -; TODO Update test when TOC-entry emission lands. -; CHECK-NOT: .tc +; CHECK: .tc a[TC],a +; CHECK: .tc b[TC],b diff --git a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll index 371fa0ec279e3..6d1863bc95371 100644 --- a/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll +++ b/llvm/test/CodeGen/PowerPC/lower-globaladdr64-aix-asm.ll @@ -41,5 +41,5 @@ define void @test_store(i32 zeroext %0) { ; LARGE: stw [[REG3:[0-9]+]], 0([[REG2]]) ; LARGE: blr -; TODO Update test when TOC-entry emission lands. -; CHECK-NOT: .tc +; CHECK: .tc a[TC],a +; CHECK: .tc b[TC],b From 8f28f26860b960295c64901ad3adcb549290da88 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 27 Nov 2019 15:11:43 -0800 Subject: [PATCH 143/591] [X86] Add SSEPackedSingle/Double execution domain to COMI/UCOMI SSE/AVX instructions. --- llvm/lib/Target/X86/X86InstrAVX512.td | 27 ++++++++++--------- llvm/lib/Target/X86/X86InstrSSE.td | 39 ++++++++++++++------------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 5051d5453f3ac..637102e47fd36 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8664,7 +8664,8 @@ let Predicates = [HasVLX] in { // Unordered/Ordered scalar fp compare with Sae and set EFLAGS multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, - string OpcodeStr, X86FoldableSchedWrite sched> { + string OpcodeStr, Domain d, + X86FoldableSchedWrite sched = WriteFCom> { let hasSideEffects = 0 in def rrb: AVX512, @@ -8672,44 +8673,44 @@ multiclass avx512_ord_cmp_sae opc, X86VectorVTInfo _, } let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>, + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>, + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; - defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>, + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>, + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, + "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFCom>, PD, EVEX, + "ucomisd", SSEPackedDouble>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; let Pattern = [] in { defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32, - "comiss", WriteFCom>, PS, EVEX, VEX_LIG, + "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64, - "comisd", WriteFCom>, PD, EVEX, + "comisd", SSEPackedDouble>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG, + sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX, + sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG, + sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFCom>, PD, EVEX, + sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ffdcb65c93bd9..7633c3f7709fa 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1815,8 +1815,8 @@ let Constraints = "$src1 = $dst" in { // sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, - PatFrag ld_frag, string OpcodeStr, - X86FoldableSchedWrite sched> { + PatFrag ld_frag, string OpcodeStr, Domain d, + X86FoldableSchedWrite sched = WriteFCom> { let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { def rr: SI opc, RegisterClass RC, SDNode OpNode, ValueType vt, Operand memop, ComplexPattern mem_cpat, string OpcodeStr, - X86FoldableSchedWrite sched> { + Domain d, + X86FoldableSchedWrite sched = WriteFCom> { let Uses = [MXCSR], mayRaiseFPException = 1 in { def rr_Int: SI, PS, VEX, VEX_LIG, VEX_WIG; + "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; + "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; + "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; + "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; } let isCodeGenOnly = 1 in { defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; + sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; + sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFCom>, PS, VEX, VEX_LIG, VEX_WIG; + sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG; defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFCom>, PD, VEX, VEX_LIG, VEX_WIG; + sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", WriteFCom>, PS; + "ucomiss", SSEPackedSingle>, PS; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", WriteFCom>, PD; + "ucomisd", SSEPackedDouble>, PD; let Pattern = [] in { defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32, - "comiss", WriteFCom>, PS; + "comiss", SSEPackedSingle>, PS; defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64, - "comisd", WriteFCom>, PD; + "comisd", SSEPackedDouble>, PD; } let isCodeGenOnly = 1 in { defm UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, - sse_load_f32, "ucomiss", WriteFCom>, PS; + sse_load_f32, "ucomiss", SSEPackedSingle>, PS; defm UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, - sse_load_f64, "ucomisd", WriteFCom>, PD; + sse_load_f64, "ucomisd", SSEPackedDouble>, PD; defm COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, - sse_load_f32, "comiss", WriteFCom>, PS; + sse_load_f32, "comiss", SSEPackedSingle>, PS; defm COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, - sse_load_f64, "comisd", WriteFCom>, PD; + sse_load_f64, "comisd", SSEPackedDouble>, PD; } } // Defs = [EFLAGS] From 5272d2a3a43b21dadb61a8320c14df94db89acc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Thu, 28 Nov 2019 00:49:26 +0100 Subject: [PATCH 144/591] [ADT] Fixed -Wdeprecated-copy warning. NFCI --- llvm/unittests/ADT/TestGraph.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/ADT/TestGraph.h b/llvm/unittests/ADT/TestGraph.h index 36d298255c1b7..3e6d4e14d5c11 100644 --- a/llvm/unittests/ADT/TestGraph.h +++ b/llvm/unittests/ADT/TestGraph.h @@ -175,8 +175,8 @@ class Graph { public: /// ChildIterator - Copy constructor. - ChildIterator(const ChildIterator& other) : FirstNode(other.FirstNode), - Children(other.Children) {} + ChildIterator(const ChildIterator &other) = default; + ChildIterator &operator=(const ChildIterator &other) = default; /// Comparison operators. bool operator==(const ChildIterator &other) const { From 1ac700cdef787383ad49a0e37d9894491ef19480 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Fri, 15 Nov 2019 02:12:58 +0100 Subject: [PATCH 145/591] [CodeGen] Fix clang crash on aggregate initialization of array of labels Summary: Fix PR43700 The ConstantEmitter in AggExprEmitter::EmitArrayInit was initialized with the CodeGenFunction set to null, which caused the crash. Also simplify another call, and make the CGF member a const pointer since it is public but only assigned in the constructor. Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70302 --- clang/lib/CodeGen/CGExprAgg.cpp | 2 +- clang/lib/CodeGen/CGExprScalar.cpp | 4 ++-- clang/lib/CodeGen/ConstantEmitter.h | 2 +- clang/test/CodeGen/label-array-aggregate-init.c | 6 ++++++ 4 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/label-array-aggregate-init.c diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 7e69f63fe1354..ecb5253c07ec3 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -493,7 +493,7 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, if (NumInitElements * elementSize.getQuantity() > 16 && elementType.isTriviallyCopyableType(CGF.getContext())) { CodeGen::CodeGenModule &CGM = CGF.CGM; - ConstantEmitter Emitter(CGM); + ConstantEmitter Emitter(CGF); LangAS AS = ArrayQTy.getAddressSpace(); if (llvm::Constant *C = Emitter.tryEmitForInitializer(E, AS, ArrayQTy)) { auto GV = new llvm::GlobalVariable( diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index d727e326a27a5..750b5503c08f8 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -644,8 +644,8 @@ class ScalarExprEmitter auto &Ctx = CGF.getContext(); APValue Evaluated = SLE->EvaluateInContext(Ctx, CGF.CurSourceLocExprScope.getDefaultExpr()); - return ConstantEmitter(CGF.CGM, &CGF) - .emitAbstract(SLE->getLocation(), Evaluated, SLE->getType()); + return ConstantEmitter(CGF).emitAbstract(SLE->getLocation(), Evaluated, + SLE->getType()); } Value *VisitCXXDefaultArgExpr(CXXDefaultArgExpr *DAE) { diff --git a/clang/lib/CodeGen/ConstantEmitter.h b/clang/lib/CodeGen/ConstantEmitter.h index 59a19730f4ebc..121acbac4fa91 100644 --- a/clang/lib/CodeGen/ConstantEmitter.h +++ b/clang/lib/CodeGen/ConstantEmitter.h @@ -23,7 +23,7 @@ namespace CodeGen { class ConstantEmitter { public: CodeGenModule &CGM; - CodeGenFunction *CGF; + CodeGenFunction *const CGF; private: bool Abstract = false; diff --git a/clang/test/CodeGen/label-array-aggregate-init.c b/clang/test/CodeGen/label-array-aggregate-init.c new file mode 100644 index 0000000000000..6821fd355ec11 --- /dev/null +++ b/clang/test/CodeGen/label-array-aggregate-init.c @@ -0,0 +1,6 @@ +// RUN: %clang -cc1 -emit-llvm %s -o /dev/null + +int main() { +L: + (void)(void *[]){ &&L, 0, 0 }; +} From acc79aa0e747b9777077e0a337e99540a52b94b2 Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Thu, 21 Nov 2019 14:14:33 -0800 Subject: [PATCH 146/591] Revert "Revert 1689ad27af5 "[builtins] Implement rounding mode support for i386/x86_64"" Don't build specilised fp_mode.c on MSVC since it does not support inline ASM on x86_64. This reverts commit a19f0eec94e195cac676d0d473882b48f4fded90. --- compiler-rt/lib/builtins/CMakeLists.txt | 7 ++++ compiler-rt/lib/builtins/i386/fp_mode.c | 39 ++++++++++++++++++++ compiler-rt/test/builtins/Unit/addtf3_test.c | 3 +- compiler-rt/test/builtins/Unit/subtf3_test.c | 3 +- 4 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 compiler-rt/lib/builtins/i386/fp_mode.c diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 1a63aad0e8f66..feacd21d0865b 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -241,6 +241,13 @@ set(x86_ARCH_SOURCES powixf2.c ) +if (NOT MSVC) + set(x86_ARCH_SOURCES + ${x86_ARCH_SOURCES} + i386/fp_mode.c + ) +endif () + if (NOT MSVC) set(x86_64_SOURCES ${GENERIC_TF_SOURCES} diff --git a/compiler-rt/lib/builtins/i386/fp_mode.c b/compiler-rt/lib/builtins/i386/fp_mode.c new file mode 100644 index 0000000000000..62ab771222c09 --- /dev/null +++ b/compiler-rt/lib/builtins/i386/fp_mode.c @@ -0,0 +1,39 @@ +//===----- lib/i386/fp_mode.c - Floaing-point mode utilities -----*- C -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../fp_mode.h" + +#define X87_TONEAREST 0x0000 +#define X87_DOWNWARD 0x0400 +#define X87_UPWARD 0x0800 +#define X87_TOWARDZERO 0x0c00 +#define X87_RMODE_MASK (X87_TONEAREST | X87_UPWARD | X87_DOWNWARD | X87_TOWARDZERO) + +FE_ROUND_MODE __fe_getround() { + // Assume that the rounding mode state for the fpu agrees with the SSE unit. + unsigned short cw; + __asm__ __volatile__ ("fnstcw %0" : "=m" (cw)); + + switch (cw & X87_RMODE_MASK) { + case X87_TONEAREST: + return FE_TONEAREST; + case X87_DOWNWARD: + return FE_DOWNWARD; + case X87_UPWARD: + return FE_UPWARD; + case X87_TOWARDZERO: + return FE_TOWARDZERO; + } + return FE_TONEAREST; +} + +int __fe_raise_inexact() { + float f = 1.0f, g = 3.0f; + __asm__ __volatile__ ("fdivs %1" : "+t" (f) : "m" (g)); + return 0; +} diff --git a/compiler-rt/test/builtins/Unit/addtf3_test.c b/compiler-rt/test/builtins/Unit/addtf3_test.c index 7ca0355e42adf..dcd4efe9c9015 100644 --- a/compiler-rt/test/builtins/Unit/addtf3_test.c +++ b/compiler-rt/test/builtins/Unit/addtf3_test.c @@ -76,7 +76,8 @@ int main() UINT64_C(0x61e58dd6c51eb77c))) return 1; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) +#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) || \ + defined(i386) || defined(__x86_64__) // Rounding mode tests on supported architectures const long double m = 1234.0L, n = 0.01L; diff --git a/compiler-rt/test/builtins/Unit/subtf3_test.c b/compiler-rt/test/builtins/Unit/subtf3_test.c index b95f2ef996d61..265ab642ecf0c 100644 --- a/compiler-rt/test/builtins/Unit/subtf3_test.c +++ b/compiler-rt/test/builtins/Unit/subtf3_test.c @@ -69,7 +69,8 @@ int main() UINT64_C(0xa44a7bca780a166c))) return 1; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) +#if (defined(__arm__) || defined(__aarch64__)) && defined(__ARM_FP) || \ + defined(i386) || defined(__x86_64__) // Rounding mode tests on supported architectures const long double m = 1234.02L, n = 0.01L; From 789a7aa37d0cca70d6e48908ce3e8bb4e761e266 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Wed, 27 Nov 2019 17:54:26 -0800 Subject: [PATCH 147/591] Properly disambiguate between array declarators and array subscript expressions. --- clang/lib/Parse/ParseTentative.cpp | 14 +++++++++++++- clang/test/Parser/cxx-ambig-decl-expr.cpp | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index e2e16ca63d1eb..9cc41328c469d 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -2066,9 +2066,21 @@ Parser::TPResult Parser::TryParseFunctionDeclarator() { /// Parser::TPResult Parser::TryParseBracketDeclarator() { ConsumeBracket(); - if (!SkipUntil(tok::r_square, StopAtSemi)) + + // A constant-expression cannot begin with a '{', but the + // expr-or-braced-init-list of a postfix-expression can. + if (Tok.is(tok::l_brace)) + return TPResult::False; + + if (!SkipUntil(tok::r_square, tok::comma, StopAtSemi | StopBeforeMatch)) return TPResult::Error; + // If we hit a comma before the ']', this is not a constant-expression, + // but might still be the expr-or-braced-init-list of a postfix-expression. + if (Tok.isNot(tok::r_square)) + return TPResult::False; + + ConsumeBracket(); return TPResult::Ambiguous; } diff --git a/clang/test/Parser/cxx-ambig-decl-expr.cpp b/clang/test/Parser/cxx-ambig-decl-expr.cpp index 6507eafb74cd7..02857e21f7c3e 100644 --- a/clang/test/Parser/cxx-ambig-decl-expr.cpp +++ b/clang/test/Parser/cxx-ambig-decl-expr.cpp @@ -17,3 +17,25 @@ auto (*q)() -> int(*)(unknown); // expected-error {{unknown type name 'unknown'} auto (*r)() -> int(*)(unknown + 1); // expected-error {{undeclared identifier 'unknown'}} int f(unknown const x); // expected-error {{unknown type name 'unknown'}} + +// Disambiguating an array declarator from an array subscripting. +void arr() { + int x[] = {1}; // expected-note 2{{previous}} + + // This is array indexing not an array declarator because a comma expression + // is not syntactically a constant-expression. + int(x[1,1]); // expected-warning 2{{unused}} + + // This is array indexing not an array declaration because a braced-init-list + // is not syntactically a constant-expression. + int(x[{0}]); // expected-error {{array subscript is not an integer}} + struct A { + struct Q { int n; }; + int operator[](Q); + } a; + int(a[{0}]); // expected-warning {{unused}} + + // These are array declarations. + int(x[(1,1)]); // expected-error {{redefinition}} + int(x[true ? 1,1 : 1]); // expected-error {{redefinition}} +} From 601cc29a57d14a229fed9505a3b28e194b6b316f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 27 Nov 2019 21:12:46 -0500 Subject: [PATCH 148/591] Revert "Revert "gn build: (manually) try to merge 1689ad27af"" This reverts commit 88276ddbfea753ac13da5a64c2020b7b0a06617f. The original change relanded. --- llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 53684d4121639..f0602a7d97c35 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -216,6 +216,7 @@ static_library("builtins") { } if (current_cpu == "x86" || current_cpu == "x64") { + sources -= [ "fp_mode.c" ] sources += [ "cpu_model.c", "divxc3.c", @@ -228,6 +229,7 @@ static_library("builtins") { "floattixf.c", "floatundixf.c", "floatuntixf.c", + "i386/fp_mode.c", "mulxc3.c", "powixf2.c", ] From 8f73a93b2deb77f08822e3d34a7c144687a19c80 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 27 Nov 2019 17:25:26 -0800 Subject: [PATCH 149/591] [X86] Add support for STRICT_FP_TO_UINT/SINT from fp128. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 ++- llvm/test/CodeGen/X86/fp128-cast-strict.ll | 108 +++++++++++++++++++++ 2 files changed, 117 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 535493a832291..c4ad26374bf0f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19712,15 +19712,20 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { // fp128 needs to use a libcall. if (SrcVT == MVT::f128) { RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::FP_TO_SINT) + if (IsSigned) LC = RTLIB::getFPTOSINT(SrcVT, VT); else LC = RTLIB::getFPTOUINT(SrcVT, VT); - // FIXME: Strict fp! - assert(!IsStrict && "Unhandled strict operation!"); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first; + std::pair Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions, + SDLoc(Op), Chain); + + if (IsStrict) + return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); + + return Tmp.first; } // Fall back to X87. diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll index 84964d7719251..99bca70964cb7 100644 --- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll @@ -165,6 +165,106 @@ entry: ret void } +define i8 @fptosi_i8(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i8: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i8 @llvm.experimental.constrained.fptosi.i8.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i8 %conv +} + +define i16 @fptosi_i16(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i16: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i16 @llvm.experimental.constrained.fptosi.i16.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i16 %conv +} + +define i32 @fptosi_i32(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i32: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @fptosi_i64(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i64: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfdi +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i8 @fptoui_i8(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i8: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i8 @llvm.experimental.constrained.fptoui.i8.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i8 %conv +} + +define i16 @fptoui_i16(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i16: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfsi +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i16 @llvm.experimental.constrained.fptoui.i16.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i16 %conv +} + +define i32 @fptoui_i32(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i32: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixunstfsi +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @fptoui_i64(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i64: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixunstfdi +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + attributes #0 = { strictfp } declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) @@ -173,3 +273,11 @@ declare x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128, metadata declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata) declare fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80, metadata) +declare i8 @llvm.experimental.constrained.fptosi.i8.f128(fp128, metadata) +declare i16 @llvm.experimental.constrained.fptosi.i16.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +declare i8 @llvm.experimental.constrained.fptoui.i8.f128(fp128, metadata) +declare i16 @llvm.experimental.constrained.fptoui.i16.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) From 1727c4f1a2c1fcdd487eee67edf64828076f5399 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 27 Nov 2019 17:44:43 -0800 Subject: [PATCH 150/591] [LegalizeTypes][X86] Add ExpandIntegerResult support for STRICT_FP_TO_SINT/STRICT_FP_TO_UINT. --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 26 ++- llvm/test/CodeGen/X86/fp-intrinsics.ll | 153 ++++++++++++++++++ llvm/test/CodeGen/X86/fp128-cast-strict.ll | 26 +++ 3 files changed, 199 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 9f8da60eb9a6c..dd082646ae5ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1698,7 +1698,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; case ISD::STRICT_LLROUND: case ISD::STRICT_LLRINT: @@ -2564,7 +2566,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); @@ -2572,8 +2576,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair Tmp = TLI.makeLibCall(DAG, LC, VT, Op, + CallOptions, dl, Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, @@ -2581,15 +2589,21 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT VT = N->getValueType(0); - SDValue Op = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, - Lo, Hi); + std::pair Tmp = TLI.makeLibCall(DAG, LC, VT, Op, + CallOptions, dl, Chain); + SplitInteger(Tmp.first, Lo, Hi); + + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); } void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index 58041c29ab64c..011d235c39f62 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -1084,6 +1084,81 @@ entry: ret i64 %result } +; Verify that fptoui(%x) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +define i128 @f20s128(double %x) nounwind strictfp { +; X87-LABEL: f20s128: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %edi +; X87-NEXT: pushl %esi +; X87-NEXT: subl $36, %esp +; X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: calll __fixdfti +; X87-NEXT: subl $4, %esp +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X87-NEXT: movl {{[0-9]+}}(%esp), %edx +; X87-NEXT: movl {{[0-9]+}}(%esp), %edi +; X87-NEXT: movl %edi, 8(%esi) +; X87-NEXT: movl %edx, 12(%esi) +; X87-NEXT: movl %eax, (%esi) +; X87-NEXT: movl %ecx, 4(%esi) +; X87-NEXT: movl %esi, %eax +; X87-NEXT: addl $36, %esp +; X87-NEXT: popl %esi +; X87-NEXT: popl %edi +; X87-NEXT: retl $4 +; +; X86-SSE-LABEL: f20s128: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $36, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esp) +; X86-SSE-NEXT: calll __fixdfti +; X86-SSE-NEXT: subl $4, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl %edi, 8(%esi) +; X86-SSE-NEXT: movl %edx, 12(%esi) +; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: movl %ecx, 4(%esi) +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: addl $36, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: retl $4 +; +; SSE-LABEL: f20s128: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: callq __fixdfti +; SSE-NEXT: popq %rcx +; SSE-NEXT: retq +; +; AVX-LABEL: f20s128: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: callq __fixdfti +; AVX-NEXT: popq %rcx +; AVX-NEXT: retq +entry: + %result = call i128 @llvm.experimental.constrained.fptosi.i128.f64(double %x, + metadata !"fpexcept.strict") #0 + ret i128 %result +} + ; Verify that fptoui(%x) isn't simplified when the rounding mode is ; unknown. ; Verify that no gross errors happen. @@ -1348,6 +1423,82 @@ entry: ret i64 %result } + +; Verify that fptoui(%x) isn't simplified when the rounding mode is +; unknown. +; Verify that no gross errors happen. +define i128 @f20u128(double %x) nounwind strictfp { +; X87-LABEL: f20u128: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %edi +; X87-NEXT: pushl %esi +; X87-NEXT: subl $36, %esp +; X87-NEXT: movl {{[0-9]+}}(%esp), %esi +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: leal {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: calll __fixunsdfti +; X87-NEXT: subl $4, %esp +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X87-NEXT: movl {{[0-9]+}}(%esp), %edx +; X87-NEXT: movl {{[0-9]+}}(%esp), %edi +; X87-NEXT: movl %edi, 8(%esi) +; X87-NEXT: movl %edx, 12(%esi) +; X87-NEXT: movl %eax, (%esi) +; X87-NEXT: movl %ecx, 4(%esi) +; X87-NEXT: movl %esi, %eax +; X87-NEXT: addl $36, %esp +; X87-NEXT: popl %esi +; X87-NEXT: popl %edi +; X87-NEXT: retl $4 +; +; X86-SSE-LABEL: f20u128: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $36, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esp) +; X86-SSE-NEXT: calll __fixunsdfti +; X86-SSE-NEXT: subl $4, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl %edi, 8(%esi) +; X86-SSE-NEXT: movl %edx, 12(%esi) +; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: movl %ecx, 4(%esi) +; X86-SSE-NEXT: movl %esi, %eax +; X86-SSE-NEXT: addl $36, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: retl $4 +; +; SSE-LABEL: f20u128: +; SSE: # %bb.0: # %entry +; SSE-NEXT: pushq %rax +; SSE-NEXT: callq __fixunsdfti +; SSE-NEXT: popq %rcx +; SSE-NEXT: retq +; +; AVX-LABEL: f20u128: +; AVX: # %bb.0: # %entry +; AVX-NEXT: pushq %rax +; AVX-NEXT: callq __fixunsdfti +; AVX-NEXT: popq %rcx +; AVX-NEXT: retq +entry: + %result = call i128 @llvm.experimental.constrained.fptoui.i128.f64(double %x, + metadata !"fpexcept.strict") #0 + ret i128 %result +} + ; Verify that round(42.1) isn't simplified when the rounding mode is ; unknown. ; Verify that no gross errors happen. @@ -1823,10 +1974,12 @@ declare i8 @llvm.experimental.constrained.fptosi.i8.f64(double, metadata) declare i16 @llvm.experimental.constrained.fptosi.i16.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f64(double, metadata) declare i8 @llvm.experimental.constrained.fptoui.i8.f64(double, metadata) declare i16 @llvm.experimental.constrained.fptoui.i16.f64(double, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f64(double, metadata) declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll index 99bca70964cb7..48751e1d9e1af 100644 --- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll @@ -215,6 +215,18 @@ entry: ret i64 %conv } +define i128 @fptosi_i128(fp128 %x) nounwind strictfp { +; X64-LABEL: fptosi_i128: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixtfti +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + define i8 @fptoui_i8(fp128 %x) nounwind strictfp { ; X64-LABEL: fptoui_i8: ; X64: # %bb.0: # %entry @@ -265,6 +277,18 @@ entry: ret i64 %conv } +define i128 @fptoui_i128(fp128 %x) nounwind strictfp { +; X64-LABEL: fptoui_i128: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: callq __fixunstfti +; X64-NEXT: popq %rcx +; X64-NEXT: retq +entry: + %conv = call i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128 %x, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + attributes #0 = { strictfp } declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) @@ -277,7 +301,9 @@ declare i8 @llvm.experimental.constrained.fptosi.i8.f128(fp128, metadata) declare i16 @llvm.experimental.constrained.fptosi.i16.f128(fp128, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128, metadata) declare i8 @llvm.experimental.constrained.fptoui.i8.f128(fp128, metadata) declare i16 @llvm.experimental.constrained.fptoui.i16.f128(fp128, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128, metadata) From a7acba29c19ac67c77ed282ec9432602ae21268d Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Thu, 28 Nov 2019 13:50:35 +0900 Subject: [PATCH 151/591] Use InitLLVM in clang-tidy Update clang-tidy to use InitLLVM, like several other llvm tools that were previously updated. On Windows, this allows clang-tidy to operate on arguments containing characters which cannot be represented in the system's ANSI code page such as filenames with Unicode characters. Fixes bugzilla bug 43751. Patch by Tristan Labelle. Differential Revision: https://reviews.llvm.org/D70694 --- clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp index df83de856238f..ad6182def20d2 100644 --- a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp +++ b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp @@ -18,6 +18,7 @@ #include "../ClangTidyForceLinker.h" #include "../GlobList.h" #include "clang/Tooling/CommonOptionsParser.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" @@ -327,7 +328,7 @@ getVfsFromFile(const std::string &OverlayFile, } static int clangTidyMain(int argc, const char **argv) { - llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + llvm::InitLLVM X(argc, argv); CommonOptionsParser OptionsParser(argc, argv, ClangTidyCategory, cl::ZeroOrMore); llvm::IntrusiveRefCntPtr BaseFS( From ed521fef03195084a04657794d919b06947178c6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 27 Nov 2019 21:15:36 -0800 Subject: [PATCH 152/591] [LegalTypes][X86] Add SoftenFloatOperand support for STRICT_FP_TO_SINT/STRICT_FP_TO_UINT. --- .../SelectionDAG/LegalizeFloatTypes.cpp | 30 +- llvm/test/CodeGen/X86/fp128-cast-strict.ll | 260 ++++++++++++++++++ 2 files changed, 282 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index f622f0a6306d8..d0c6021a0b435 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -794,6 +794,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; case ISD::STRICT_LROUND: @@ -905,8 +907,12 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { - bool Signed = N->getOpcode() == ISD::FP_TO_SINT; - EVT SVT = N->getOperand(0).getValueType(); + bool IsStrict = N->isStrictFPOpcode(); + bool Signed = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; + + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); SDLoc dl(N); @@ -922,18 +928,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { NVT = (MVT::SimpleValueType)IntVT; // The type needs to big enough to hold the result. if (NVT.bitsGE(RVT)) - LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT); } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); + Op = GetSoftenedFloat(Op); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; + CallOptions.setTypeListBeforeSoften(SVT, RVT, true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, dl, Chain); // Truncate the result if the libcall returns a larger type. - return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); + SDValue Res = DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first); + + if (!IsStrict) + return Res; + + ReplaceValueWith(SDValue(N, 1), Tmp.second); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll index 48751e1d9e1af..2173ff369a927 100644 --- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll @@ -5,6 +5,7 @@ ; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX ; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=i686-linux-gnu -mattr=-sse | FileCheck %s --check-prefixes=X86 ; Check soft floating point conversion function calls. @@ -31,6 +32,28 @@ define void @TestFPExtF32_F128() nounwind strictfp { ; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) ; X64-AVX-NEXT: popq %rax ; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPExtF32_F128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $24, %esp +; X86-NEXT: flds vf32 +; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: calll __extendsftf2 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, vf128+8 +; X86-NEXT: movl %edx, vf128+12 +; X86-NEXT: movl %eax, vf128 +; X86-NEXT: movl %ecx, vf128+4 +; X86-NEXT: addl $24, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl entry: %0 = load float, float* @vf32, align 4 %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %0, metadata !"fpexcept.strict") #0 @@ -56,6 +79,28 @@ define void @TestFPExtF64_F128() nounwind strictfp { ; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) ; X64-AVX-NEXT: popq %rax ; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPExtF64_F128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $40, %esp +; X86-NEXT: fldl vf64 +; X86-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: calll __extenddftf2 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, vf128+8 +; X86-NEXT: movl %edx, vf128+12 +; X86-NEXT: movl %eax, vf128 +; X86-NEXT: movl %ecx, vf128+4 +; X86-NEXT: addl $40, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl entry: %0 = load double, double* @vf64, align 8 %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %0, metadata !"fpexcept.strict") #0 @@ -83,6 +128,28 @@ define void @TestFPExtF80_F128() nounwind strictfp { ; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) ; X64-AVX-NEXT: addq $24, %rsp ; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPExtF80_F128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: subl $40, %esp +; X86-NEXT: fldt vf80 +; X86-NEXT: fstpt {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: calll __extendxftf2 +; X86-NEXT: subl $4, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, vf128+8 +; X86-NEXT: movl %edx, vf128+12 +; X86-NEXT: movl %eax, vf128 +; X86-NEXT: movl %ecx, vf128+4 +; X86-NEXT: addl $40, %esp +; X86-NEXT: popl %esi +; X86-NEXT: retl entry: %0 = load x86_fp80, x86_fp80* @vf80, align 8 %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80 %0, metadata !"fpexcept.strict") #0 @@ -108,6 +175,19 @@ define void @TestFPTruncF128_F32() nounwind strictfp { ; X64-AVX-NEXT: vmovss %xmm0, {{.*}}(%rip) ; X64-AVX-NEXT: popq %rax ; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPTruncF128_F32: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl vf128+12 +; X86-NEXT: pushl vf128+8 +; X86-NEXT: pushl vf128+4 +; X86-NEXT: pushl vf128 +; X86-NEXT: calll __trunctfsf2 +; X86-NEXT: addl $16, %esp +; X86-NEXT: fstps vf32 +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl entry: %0 = load fp128, fp128* @vf128, align 16 %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -133,6 +213,19 @@ define void @TestFPTruncF128_F64() nounwind strictfp { ; X64-AVX-NEXT: vmovsd %xmm0, {{.*}}(%rip) ; X64-AVX-NEXT: popq %rax ; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPTruncF128_F64: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl vf128+12 +; X86-NEXT: pushl vf128+8 +; X86-NEXT: pushl vf128+4 +; X86-NEXT: pushl vf128 +; X86-NEXT: calll __trunctfdf2 +; X86-NEXT: addl $16, %esp +; X86-NEXT: fstpl vf64 +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl entry: %0 = load fp128, fp128* @vf128, align 16 %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -158,6 +251,19 @@ define void @TestFPTruncF128_F80() nounwind strictfp { ; X64-AVX-NEXT: fstpt {{.*}}(%rip) ; X64-AVX-NEXT: popq %rax ; X64-AVX-NEXT: retq +; +; X86-LABEL: TestFPTruncF128_F80: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl vf128+12 +; X86-NEXT: pushl vf128+8 +; X86-NEXT: pushl vf128+4 +; X86-NEXT: pushl vf128 +; X86-NEXT: calll __trunctfxf2 +; X86-NEXT: addl $16, %esp +; X86-NEXT: fstpt vf80 +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl entry: %0 = load fp128, fp128* @vf128, align 16 %conv = call x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -173,6 +279,19 @@ define i8 @fptosi_i8(fp128 %x) nounwind strictfp { ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptosi_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixtfsi +; X86-NEXT: addl $16, %esp +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl entry: %conv = call i8 @llvm.experimental.constrained.fptosi.i8.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i8 %conv @@ -186,6 +305,19 @@ define i16 @fptosi_i16(fp128 %x) nounwind strictfp { ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptosi_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixtfsi +; X86-NEXT: addl $16, %esp +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl entry: %conv = call i16 @llvm.experimental.constrained.fptosi.i16.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i16 %conv @@ -198,6 +330,17 @@ define i32 @fptosi_i32(fp128 %x) nounwind strictfp { ; X64-NEXT: callq __fixtfsi ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptosi_i32: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixtfsi +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl entry: %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %conv @@ -210,6 +353,17 @@ define i64 @fptosi_i64(fp128 %x) nounwind strictfp { ; X64-NEXT: callq __fixtfdi ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptosi_i64: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixtfdi +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl entry: %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %conv @@ -222,6 +376,35 @@ define i128 @fptosi_i128(fp128 %x) nounwind strictfp { ; X64-NEXT: callq __fixtfti ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptosi_i128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __fixtfti +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %conv = call i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i128 %conv @@ -235,6 +418,19 @@ define i8 @fptoui_i8(fp128 %x) nounwind strictfp { ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptoui_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixunstfsi +; X86-NEXT: addl $16, %esp +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl entry: %conv = call i8 @llvm.experimental.constrained.fptoui.i8.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i8 %conv @@ -248,6 +444,19 @@ define i16 @fptoui_i16(fp128 %x) nounwind strictfp { ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptoui_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixunstfsi +; X86-NEXT: addl $16, %esp +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: addl $12, %esp +; X86-NEXT: retl entry: %conv = call i16 @llvm.experimental.constrained.fptoui.i16.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i16 %conv @@ -260,6 +469,17 @@ define i32 @fptoui_i32(fp128 %x) nounwind strictfp { ; X64-NEXT: callq __fixunstfsi ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptoui_i32: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixunstfsi +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl entry: %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %conv @@ -272,6 +492,17 @@ define i64 @fptoui_i64(fp128 %x) nounwind strictfp { ; X64-NEXT: callq __fixunstfdi ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptoui_i64: +; X86: # %bb.0: # %entry +; X86-NEXT: subl $12, %esp +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __fixunstfdi +; X86-NEXT: addl $28, %esp +; X86-NEXT: retl entry: %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %conv @@ -284,6 +515,35 @@ define i128 @fptoui_i128(fp128 %x) nounwind strictfp { ; X64-NEXT: callq __fixunstfti ; X64-NEXT: popq %rcx ; X64-NEXT: retq +; +; X86-LABEL: fptoui_i128: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %eax +; X86-NEXT: calll __fixunstfti +; X86-NEXT: addl $28, %esp +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 12(%esi) +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: addl $20, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl $4 entry: %conv = call i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i128 %conv From 735f4793f13d799a1ad480192567a62cc8158bf1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 27 Nov 2019 22:03:40 -0800 Subject: [PATCH 153/591] [LegalizeTypes] Remove dead code related to softening f16 which we no longer do. f16 is promoted to f32 if it is not legal on the target. Found while reviewing what else needed to be done for strict FP in the softening code. --- .../SelectionDAG/LegalizeFloatTypes.cpp | 26 ------------------- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 - 2 files changed, 27 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index d0c6021a0b435..582ec001cc711 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -546,12 +546,6 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); - if (N->getValueType(0) == MVT::f16) { - // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a - // storage-only type get a chance to select things. - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); - } - RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -790,7 +784,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; - case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; @@ -833,25 +826,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { - // If we get here, the result must be legal but the source illegal. - EVT SVT = N->getOperand(0).getValueType(); - EVT RVT = N->getValueType(0); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (SVT == MVT::f16) - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); - - RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; -} - - SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // We actually deal with the partially-softened FP_TO_FP16 node too, which // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 7274a521f468e..2fccf7b9cab6a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -533,7 +533,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); - SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); SDValue SoftenFloatOp_LROUND(SDNode *N); From 825debe847d15a5670eff54745a6691145ddfae1 Mon Sep 17 00:00:00 2001 From: Ehud Katz Date: Thu, 28 Nov 2019 08:27:50 +0200 Subject: [PATCH 154/591] [InlineCost] Fix infinite loop in indirect call evaluation Currently every time we encounter an indirect call of a known function, we try to evaluate the inline cost of that function. In case of a recursion, that evaluation never stops. The solution I propose is to evaluate only the indirect call of the function, while any further indirect calls (of a known function) will be treated just as direct function calls, which, actually, never tries to evaluate the call. Fixes PR35469. Differential Revision: https://reviews.llvm.org/D69349 --- llvm/lib/Analysis/InlineCost.cpp | 170 +++++++++--------- .../Inline/inline-indirect-chain.ll | 55 ++++++ 2 files changed, 140 insertions(+), 85 deletions(-) create mode 100644 llvm/test/Transforms/Inline/inline-indirect-chain.ll diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 1ba03de69890b..55ce940bc3a5e 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -51,7 +51,7 @@ static cl::opt InlineThreshold( cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt HintThreshold( - "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, + "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt @@ -63,7 +63,7 @@ static cl::opt // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt @@ -149,6 +149,9 @@ class CallAnalyzer : public InstVisitor { bool HasUninlineableIntrinsic = false; bool InitsVargArgs = false; + /// Attempt to evaluate indirect calls to boost its inline cost. + bool BoostIndirectCalls; + /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize = 0; unsigned NumInstructions = 0; @@ -295,13 +298,14 @@ class CallAnalyzer : public InstVisitor { std::function &GetAssumptionCache, Optional> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, - Function &Callee, CallBase &Call, const InlineParams &Params) + Function &Callee, CallBase &Call, const InlineParams &Params, + bool BoostIndirect = true) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), - EnableLoadElimination(true) {} + BoostIndirectCalls(BoostIndirect), EnableLoadElimination(true) {} InlineResult analyzeCall(CallBase &Call); @@ -423,9 +427,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { Operands.push_back(GEP.getOperand(0)); for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) - Operands.push_back(SimpleOp); - else - Operands.push_back(*I); + Operands.push_back(SimpleOp); + else + Operands.push_back(*I); return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); } @@ -1239,97 +1243,93 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { if (isa(Call) && cast(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; - if (Function *F = Call.getCalledFunction()) { - // When we have a concrete function, first try to simplify it directly. - if (simplifyCallSite(F, Call)) - return true; - - // Next check if it is an intrinsic we know about. - // FIXME: Lift this into part of the InstVisitor. - if (IntrinsicInst *II = dyn_cast(&Call)) { - switch (II->getIntrinsicID()) { - default: - if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) - disableLoadElimination(); - return Base::visitCallBase(Call); - - case Intrinsic::load_relative: - // This is normally lowered to 4 LLVM instructions. - addCost(3 * InlineConstants::InstrCost); - return false; + Value *Callee = Call.getCalledOperand(); + Function *F = dyn_cast_or_null(Callee); + bool IsIndirectCall = !F; + if (IsIndirectCall) { + // Check if this happens to be an indirect function call to a known function + // in this inline context. If not, we've done all we can. + F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); + if (!F) { + // Pay the price of the argument setup. We account for the average 1 + // instruction per call argument setup here. + addCost(Call.arg_size() * InlineConstants::InstrCost); - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: + if (!Call.onlyReadsMemory()) disableLoadElimination(); - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - case Intrinsic::icall_branch_funnel: - case Intrinsic::localescape: - HasUninlineableIntrinsic = true; - return false; - case Intrinsic::vastart: - InitsVargArgs = true; - return false; - } + return Base::visitCallBase(Call); } + } - if (F == Call.getFunction()) { - // This flag will fully abort the analysis, so don't bother with anything - // else. - IsRecursiveCall = true; - return false; - } + assert(F && "Expected a call to a known function"); - if (TTI.isLoweredToCall(F)) { - // We account for the average 1 instruction per call argument setup - // here. - addCost(Call.arg_size() * InlineConstants::InstrCost); + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, Call)) + return true; - // Everything other than inline ASM will also have a significant cost - // merely from making the call. - if (!isa(Call.getCalledValue())) - addCost(InlineConstants::CallPenalty); - } + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast(&Call)) { + switch (II->getIntrinsicID()) { + default: + if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) + disableLoadElimination(); + return Base::visitCallBase(Call); + + case Intrinsic::load_relative: + // This is normally lowered to 4 LLVM instructions. + addCost(3 * InlineConstants::InstrCost); + return false; - if (!Call.onlyReadsMemory()) + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: disableLoadElimination(); - return Base::visitCallBase(Call); + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + case Intrinsic::icall_branch_funnel: + case Intrinsic::localescape: + HasUninlineableIntrinsic = true; + return false; + case Intrinsic::vastart: + InitsVargArgs = true; + return false; + } } - // Otherwise we're in a very special case -- an indirect function call. See - // if we can be particularly clever about this. - Value *Callee = Call.getCalledValue(); - - // First, pay the price of the argument setup. We account for the average - // 1 instruction per call argument setup here. - addCost(Call.arg_size() * InlineConstants::InstrCost); - - // Next, check if this happens to be an indirect function call to a known - // function in this inline context. If not, we've done all we can. - Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) { - if (!Call.onlyReadsMemory()) - disableLoadElimination(); - return Base::visitCallBase(Call); + if (F == Call.getFunction()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; } - // If we have a constant that we are calling as a function, we can peer - // through it and see the function target. This happens not infrequently - // during devirtualization and so we want to give it a hefty bonus for - // inlining, but cap that bonus in the event that inlining wouldn't pan - // out. Pretend to inline the function, with a custom threshold. - auto IndirectCallParams = Params; - IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, - IndirectCallParams); - if (CA.analyzeCall(Call)) { - // We were able to inline the indirect call! Subtract the cost from the - // threshold to get the bonus we want to apply, but don't go below zero. - Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + if (TTI.isLoweredToCall(F)) { + // We account for the average 1 instruction per call argument setup here. + addCost(Call.arg_size() * InlineConstants::InstrCost); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan out. + // Pretend to inline the function, with a custom threshold. + if (IsIndirectCall && BoostIndirectCalls) { + auto IndirectCallParams = Params; + IndirectCallParams.DefaultThreshold = + InlineConstants::IndirectCallThreshold; + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, + IndirectCallParams, false); + if (CA.analyzeCall(Call)) { + // We were able to inline the indirect call! Subtract the cost from the + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); + } + } else + // Otherwise simply add the cost for merely making the call. + addCost(InlineConstants::CallPenalty); } - if (!F->onlyReadsMemory()) + if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory()))) disableLoadElimination(); return Base::visitCallBase(Call); } @@ -1494,7 +1494,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; int64_t SwitchCost = - ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; addCost(SwitchCost, (int64_t)CostUpperBound); return false; diff --git a/llvm/test/Transforms/Inline/inline-indirect-chain.ll b/llvm/test/Transforms/Inline/inline-indirect-chain.ll new file mode 100644 index 0000000000000..bf73ad35dade7 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-indirect-chain.ll @@ -0,0 +1,55 @@ +; RUN: opt -inline -early-cse < %s +; This test used to crash (PR35469). + +define void @func1() { + %t = bitcast void ()* @func2 to void ()* + tail call void %t() + ret void +} + +define void @func2() { + %t = bitcast void ()* @func3 to void ()* + tail call void %t() + ret void +} + +define void @func3() { + %t = bitcast void ()* @func4 to void ()* + tail call void %t() + ret void +} + +define void @func4() { + br i1 undef, label %left, label %right + +left: + %t = bitcast void ()* @func5 to void ()* + tail call void %t() + ret void + +right: + ret void +} + +define void @func5() { + %t = bitcast void ()* @func6 to void ()* + tail call void %t() + ret void +} + +define void @func6() { + %t = bitcast void ()* @func2 to void ()* + tail call void %t() + ret void +} + +define void @func7() { + %t = bitcast void ()* @func3 to void ()* + tail call void @func8(void()* %t) + ret void +} + +define void @func8(void()* %f) { + tail call void %f() + ret void +} From c33598d5e547cddcd0b2a8e647570a759e01e02b Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 27 Nov 2019 22:25:59 -0800 Subject: [PATCH 155/591] [JITLink] Make sure MachO/x86-64 handles 32-bit signed addends correctly. These need to be sign extended when loading into Edge addends. --- .../ExecutionEngine/JITLink/MachO_x86_64.cpp | 8 ++--- .../JITLink/X86/MachO_x86-64_relocations.s | 35 ++++++++++++------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp index 9dbfb6556e317..69ec72aae2928 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp @@ -252,7 +252,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { TargetSymbol = TargetSymbolOrErr->GraphSymbol; else return TargetSymbolOrErr.takeError(); - Addend = *(const ulittle32_t *)FixupContent; + Addend = *(const little32_t *)FixupContent; break; case Pointer32: if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum)) @@ -284,12 +284,12 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { TargetSymbol = TargetSymbolOrErr->GraphSymbol; else return TargetSymbolOrErr.takeError(); - Addend = *(const ulittle32_t *)FixupContent + + Addend = *(const little32_t *)FixupContent + (1 << (*Kind - PCRel32Minus1)); break; case PCRel32Anon: { JITTargetAddress TargetAddress = - FixupAddress + 4 + *(const ulittle32_t *)FixupContent; + FixupAddress + 4 + *(const little32_t *)FixupContent; if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) TargetSymbol = &*TargetSymbolOrErr; else @@ -303,7 +303,7 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder { JITTargetAddress Delta = static_cast(1ULL << (*Kind - PCRel32Minus1Anon)); JITTargetAddress TargetAddress = - FixupAddress + 4 + Delta + *(const ulittle32_t *)FixupContent; + FixupAddress + 4 + Delta + *(const little32_t *)FixupContent; if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress)) TargetSymbol = &*TargetSymbolOrErr; else diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s index c97b1ecce6d6d..5fabc6db1218b 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s @@ -40,6 +40,17 @@ test_gotld: movq external_data@GOTPCREL(%rip), %rax retq + +# Check X86_64_RELOC_GOTPCREL handling with cmp instructions, which have +# negative addends. +# +# jitlink-check: decode_operand(test_gotcmpq, 3) = got_addr(macho_reloc.o, external_data) - next_pc(test_gotcmpq) + .globl test_gotcmpq + .align 4, 0x90 +test_gotcmpq: + cmpq $0, external_data@GOTPCREL(%rip) + retq + # Check that calls to external functions trigger the generation of stubs and GOT # entries. # @@ -118,16 +129,16 @@ Lanon_data: # anonymous. # # Note: +8 offset in expression below to accounts for sizeof(Lanon_data). -# jitlink-check: *{8}(section_addr(macho_reloc.o, __data) + 8) = (section_addr(macho_reloc.o, __data) + 8) - named_data + 2 +# jitlink-check: *{8}(section_addr(macho_reloc.o, __data) + 8) = (section_addr(macho_reloc.o, __data) + 8) - named_data - 2 .p2align 3 Lanon_minuend_quad: - .quad Lanon_minuend_quad - named_data + 2 + .quad Lanon_minuend_quad - named_data - 2 # Note: +16 offset in expression below to accounts for sizeof(Lanon_data) + sizeof(Lanon_minuend_long). -# jitlink-check: *{4}(section_addr(macho_reloc.o, __data) + 16) = ((section_addr(macho_reloc.o, __data) + 16) - named_data + 2)[31:0] +# jitlink-check: *{4}(section_addr(macho_reloc.o, __data) + 16) = ((section_addr(macho_reloc.o, __data) + 16) - named_data - 2)[31:0] .p2align 2 Lanon_minuend_long: - .long Lanon_minuend_long - named_data + 2 + .long Lanon_minuend_long - named_data - 2 # Named quad storage target (first named atom in __data). .globl named_data @@ -221,11 +232,11 @@ minuend_long3: # (i.e. is part of an alt_entry chain that includes 'A'). # # Check "A: .long B - C + D" where 'B' is an alt_entry for 'A'. -# jitlink-check: *{4}subtractor_with_alt_entry_minuend_long = (subtractor_with_alt_entry_minuend_long_B - named_data + 2)[31:0] +# jitlink-check: *{4}subtractor_with_alt_entry_minuend_long = (subtractor_with_alt_entry_minuend_long_B - named_data - 2)[31:0] .globl subtractor_with_alt_entry_minuend_long .p2align 2 subtractor_with_alt_entry_minuend_long: - .long subtractor_with_alt_entry_minuend_long_B - named_data + 2 + .long subtractor_with_alt_entry_minuend_long_B - named_data - 2 .globl subtractor_with_alt_entry_minuend_long_B .p2align 2 @@ -234,11 +245,11 @@ subtractor_with_alt_entry_minuend_long_B: .long 0 # Check "A: .quad B - C + D" where 'B' is an alt_entry for 'A'. -# jitlink-check: *{8}subtractor_with_alt_entry_minuend_quad = (subtractor_with_alt_entry_minuend_quad_B - named_data + 2) +# jitlink-check: *{8}subtractor_with_alt_entry_minuend_quad = (subtractor_with_alt_entry_minuend_quad_B - named_data - 2) .globl subtractor_with_alt_entry_minuend_quad .p2align 3 subtractor_with_alt_entry_minuend_quad: - .quad subtractor_with_alt_entry_minuend_quad_B - named_data + 2 + .quad subtractor_with_alt_entry_minuend_quad_B - named_data - 2 .globl subtractor_with_alt_entry_minuend_quad_B .p2align 3 @@ -247,11 +258,11 @@ subtractor_with_alt_entry_minuend_quad_B: .quad 0 # Check "A: .long B - C + D" where 'C' is an alt_entry for 'A'. -# jitlink-check: *{4}subtractor_with_alt_entry_subtrahend_long = (named_data - subtractor_with_alt_entry_subtrahend_long_B + 2)[31:0] +# jitlink-check: *{4}subtractor_with_alt_entry_subtrahend_long = (named_data - subtractor_with_alt_entry_subtrahend_long_B - 2)[31:0] .globl subtractor_with_alt_entry_subtrahend_long .p2align 2 subtractor_with_alt_entry_subtrahend_long: - .long named_data - subtractor_with_alt_entry_subtrahend_long_B + 2 + .long named_data - subtractor_with_alt_entry_subtrahend_long_B - 2 .globl subtractor_with_alt_entry_subtrahend_long_B .p2align 2 @@ -260,11 +271,11 @@ subtractor_with_alt_entry_subtrahend_long_B: .long 0 # Check "A: .quad B - C + D" where 'B' is an alt_entry for 'A'. -# jitlink-check: *{8}subtractor_with_alt_entry_subtrahend_quad = (named_data - subtractor_with_alt_entry_subtrahend_quad_B + 2) +# jitlink-check: *{8}subtractor_with_alt_entry_subtrahend_quad = (named_data - subtractor_with_alt_entry_subtrahend_quad_B - 2) .globl subtractor_with_alt_entry_subtrahend_quad .p2align 3 subtractor_with_alt_entry_subtrahend_quad: - .quad named_data - subtractor_with_alt_entry_subtrahend_quad_B + 2 + .quad named_data - subtractor_with_alt_entry_subtrahend_quad_B - 2 .globl subtractor_with_alt_entry_subtrahend_quad_B .p2align 3 From 1bc5c52afdcbc6832bfcbe89639b6a662b58901a Mon Sep 17 00:00:00 2001 From: "Wang, Pengfei" Date: Thu, 28 Nov 2019 14:45:39 +0800 Subject: [PATCH 156/591] [X86][NFC] Rename test file for following changes. --- .../X86/{mmx-reg-usage.ll => mxcsr-reg-usage.ll} | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) rename llvm/test/CodeGen/X86/{mmx-reg-usage.ll => mxcsr-reg-usage.ll} (95%) diff --git a/llvm/test/CodeGen/X86/mmx-reg-usage.ll b/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll similarity index 95% rename from llvm/test/CodeGen/X86/mmx-reg-usage.ll rename to llvm/test/CodeGen/X86/mxcsr-reg-usage.ll index a8d88c2e9f8e2..da7653255a8d9 100644 --- a/llvm/test/CodeGen/X86/mmx-reg-usage.ll +++ b/llvm/test/CodeGen/X86/mxcsr-reg-usage.ll @@ -1,7 +1,12 @@ ; RUN: llc -march=x86-64 -mattr=+mmx -stop-after finalize-isel -o - %s | FileCheck %s ; This test ensures that the MXCSR is implicitly used by MMX FP instructions. -define x86_mmx @mxcsr_usage(<4 x float> %a0) { +define x86_mmx @mxcsr_mmx(<4 x float> %a0) { +; CHECK: MMX_CVTPS2PIirr %{{[0-9]}}, implicit $mxcsr +; CHECK: MMX_CVTPI2PSirr %{{[0-9]}}, killed %{{[0-9]}}, implicit $mxcsr +; CHECK: MMX_CVTTPS2PIirr killed %{{[0-9]}}, implicit $mxcsr +; CHECK: MMX_CVTPI2PDirr killed %{{[0-9]$}} +; CHECK: MMX_CVTPD2PIirr killed %{{[0-9]}}, implicit $mxcsr %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %1) %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2) @@ -15,9 +20,3 @@ declare<4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) - -; CHECK: MMX_CVTPS2PIirr %{{[0-9]}}, implicit $mxcsr -; CHECK: MMX_CVTPI2PSirr %{{[0-9]}}, killed %{{[0-9]}}, implicit $mxcsr -; CHECK: MMX_CVTTPS2PIirr killed %{{[0-9]}}, implicit $mxcsr -; CHECK: MMX_CVTPI2PDirr killed %{{[0-9]$}} -; CHECK: MMX_CVTPD2PIirr killed %{{[0-9]}}, implicit $mxcsr From bb7d75ef1df350948a95d875c9746115f6481a2d Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 27 Nov 2019 18:26:54 +0300 Subject: [PATCH 157/591] [llvm-readelf][llvm-readobj][test] - Cleanup test cases for versioning sections. Currently we have 2 tests for testing versioning sections: 1) elf-versioninfo.test 2) elf-invalid-versioning.test The first one currently checks how versioning sections are dumped + how tools dump invalid SHT_GNU_verdef section. The second despite of its name contains only tests for invalid SHT_GNU_verneed section. In this patch I`ve renamed elf-invalid-versioning.test->elf-verneed-invalid.test, and moved a few tests from elf-versioninfo.test to a new elf-verdef-invalid.test. It will help to maintain these and a new tests for broken versioning sections. Differential revision: --- .../llvm-readobj/elf-verdef-invalid.test | 236 +++++++++++++++++ ...rsioning.test => elf-verneed-invalid.test} | 3 +- .../tools/llvm-readobj/elf-versioninfo.test | 237 +----------------- 3 files changed, 239 insertions(+), 237 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/elf-verdef-invalid.test rename llvm/test/tools/llvm-readobj/{elf-invalid-versioning.test => elf-verneed-invalid.test} (98%) diff --git a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test new file mode 100644 index 0000000000000..493537464a008 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test @@ -0,0 +1,236 @@ +## Test how llvm-readobj/llvm-readelf tools handle invalid SHT_GNU_verdef sections. + +## Check that we report a warning when sh_link references a non-existent section. + +# RUN: yaml2obj %s --docnum=1 -o %t1 +# RUN: llvm-readobj -V %t1 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-LLVM -DFILE=%t1 +# RUN: not llvm-readelf -V %t1 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-GNU -DFILE=%t1 + +# INVALID-LINK-LLVM: warning: '[[FILE]]': invalid section linked to SHT_GNU_verdef section with index 1: invalid section index: 255 + +## TODO: llvm-readelf should also report a meaningful warning instead of an error. +# INVALID-LINK-GNU: Version definition +# INVALID-LINK-GNU: error: '[[FILE]]': invalid section index: 255 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: 0xFF + Info: 0x0 + Entries: [] + +## Check that we report a warning when the sh_link field of a SHT_GNU_verdef section references a non-string table section. + +# RUN: yaml2obj %s --docnum=2 -o %t2 +# RUN: llvm-readobj -V %t2 2>&1 | FileCheck %s --check-prefix=INVALID-STRING-TABLE -DFILE=%t2 +# RUN: llvm-readelf -V %t2 2>&1 | FileCheck %s --check-prefix=INVALID-STRING-TABLE -DFILE=%t2 + +# INVALID-STRING-TABLE: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verdef section with index 1: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: 0x0 + Info: 0x0 + Entries: [] + +## Check that we report a warning when we can't read the content of the SHT_GNU_verdef section. + +# RUN: yaml2obj %s --docnum=3 -o %t3 +# RUN: llvm-readobj -V %t3 2>&1 | FileCheck %s --check-prefix=INVALID-DATA -DFILE=%t3 +# RUN: llvm-readelf -V %t3 2>&1 | FileCheck %s --check-prefix=INVALID-DATA -DFILE=%t3 + +# INVALID-DATA: warning: '[[FILE]]': cannot read content of SHT_GNU_verdef section with index 1: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x0) that cannot be represented + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x0 + Entries: [] + ShOffset: 0xFFFFFFFF +DynamicSymbols: + - Name: foo + +## Check that we report a warning when a SHT_GNU_verdef section contains a version definition +## that goes past the end of the section. + +# RUN: yaml2obj %s --docnum=4 -o %t4 +# RUN: llvm-readobj -V %t4 2>&1 | FileCheck %s --check-prefix=DEF-PAST-END -DFILE=%t4 +# RUN: llvm-readelf -V %t4 2>&1 | FileCheck %s --check-prefix=DEF-PAST-END -DFILE=%t4 + +# DEF-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: version definition 1 goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - FOO + ShSize: 1 +DynamicSymbols: + - Name: foo + +## Check that we report a warning when a SHT_GNU_verdef section contains a version definition +## that refers to an auxiliary entry that goes past the end of the section. + +# RUN: yaml2obj %s --docnum=5 -o %t5 +# RUN: llvm-readobj -V %t5 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t5 +# RUN: llvm-readelf -V %t5 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t5 + +# AUX-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: version definition 1 refers to an auxiliary entry that goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - FOO + ShSize: 21 +DynamicSymbols: + - Name: foo + +## Check that we can dump a SHT_GNU_verdef section properly even if it contains version names strings +## that overrun the linked string table. + +# RUN: yaml2obj %s --docnum=6 -o %t6 +# RUN: llvm-readobj -V %t6 2>&1 | FileCheck %s --check-prefix=PAST-STRTAB-END-LLVM --implicit-check-not="warning:" -DFILE=%t6 +# RUN: llvm-readelf -V %t6 2>&1 | FileCheck %s --check-prefix=PAST-STRTAB-END-GNU --implicit-check-not="warning:" -DFILE=%t6 + +# PAST-STRTAB-END-LLVM: VersionDefinitions [ +# PAST-STRTAB-END-LLVM-NEXT: Definition { +# PAST-STRTAB-END-LLVM-NEXT: Version: 0 +# PAST-STRTAB-END-LLVM-NEXT: Flags [ (0x0) +# PAST-STRTAB-END-LLVM-NEXT: ] +# PAST-STRTAB-END-LLVM-NEXT: Index: 0 +# PAST-STRTAB-END-LLVM-NEXT: Hash: 0 +# PAST-STRTAB-END-LLVM-NEXT: Name: +# PAST-STRTAB-END-LLVM-NEXT: Predecessors: [] +# PAST-STRTAB-END-LLVM-NEXT: } +# PAST-STRTAB-END-LLVM-NEXT: ] + +# PAST-STRTAB-END-GNU: Version definition section '.gnu.version_d' contains 1 entries: +# PAST-STRTAB-END-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 2 (.strtab) +# PAST-STRTAB-END-GNU-NEXT: 0x0000: Rev: 0 Flags: none Index: 0 Cnt: 1 Name: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .strtab + Info: 0x1 + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - FOO + - Name: .strtab + Type: SHT_STRTAB +DynamicSymbols: + - Name: BAR + +## Check we report a warning when a version definition is not correctly aligned in memory. + +# RUN: yaml2obj %s --docnum=7 -o %t7 +# RUN: llvm-readobj -V %t7 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t7 +# RUN: llvm-readelf -V %t7 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t7 + +# MISALIGNED-DEF: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: found a misaligned version definition entry at offset 0x0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Type: Fill + Size: 0x1 + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Version: 0 + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: + - FOO +DynamicSymbols: + - Name: foo + +## Check we report a warning when an auxiliary entry is not correctly aligned in memory. + +# RUN: yaml2obj %s --docnum=8 -o %t8 +# RUN: llvm-readobj -V %t8 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t8 +# RUN: llvm-readelf -V %t8 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t8 + +# MISALIGNED-AUX: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: found a misaligned auxiliary entry at offset 0x13 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Link: .dynstr + Info: 0x1 +## The byte offset to the auxiliary entry is 0x13, i.e. it is not correctly aligned in memory. + Content: "0000000000000100000000001300000000000000" +DynamicSymbols: + - Name: foo + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-readobj/elf-invalid-versioning.test b/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test similarity index 98% rename from llvm/test/tools/llvm-readobj/elf-invalid-versioning.test rename to llvm/test/tools/llvm-readobj/elf-verneed-invalid.test index d7a5198df1005..971cada0195a5 100644 --- a/llvm/test/tools/llvm-readobj/elf-invalid-versioning.test +++ b/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test @@ -1,5 +1,4 @@ -## Here we test how llvm-readelf/llvm-readobj behave when inputs have -## invalid versioning sections. +## Test how llvm-readobj/llvm-readelf tools handle invalid SHT_GNU_verneed sections. ## In the first case we have a SHT_GNU_versym section that refers to ## a version listed in a SHT_GNU_verneed section. That version has an diff --git a/llvm/test/tools/llvm-readobj/elf-versioninfo.test b/llvm/test/tools/llvm-readobj/elf-versioninfo.test index 591dfe27a1a86..a7eaa80bb5df6 100644 --- a/llvm/test/tools/llvm-readobj/elf-versioninfo.test +++ b/llvm/test/tools/llvm-readobj/elf-versioninfo.test @@ -1,4 +1,6 @@ ## Test how llvm-readobj/llvm-readelf tools dump versioning sections. +## Check that SHT_GNU_versym dumper can see versions described in +## SHT_GNU_verdef and SHT_GNU_verneed sections. # RUN: yaml2obj %s --docnum=1 -o %t1 # RUN: llvm-readobj -V %t1 | FileCheck %s --check-prefix=LLVM @@ -293,238 +295,3 @@ DynamicSymbols: # GNU-NEXT: 0x0050: Name: v2 Flags: Version: 5 # GNU-NEXT: 0x0060: Version: 1 File: verneed2.so.0 Cnt: 1 # GNU-NEXT: 0x0070: Name: v3 Flags: none Version: 6 - -## Check that we report a warning when sh_link references a non-existent section. - -# RUN: yaml2obj %s --docnum=2 -o %t2 -# RUN: llvm-readobj -V %t2 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-LLVM -DFILE=%t2 -# RUN: not llvm-readelf -V %t2 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-GNU -DFILE=%t2 - -# INVALID-LINK-LLVM: warning: '[[FILE]]': invalid section linked to SHT_GNU_verdef section with index 1: invalid section index: 255 - -## TODO: llvm-readelf should also report a meaningful warning instead of an error. -# INVALID-LINK-GNU: Version definition -# INVALID-LINK-GNU: error: '[[FILE]]': invalid section index: 255 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Link: 0xFF - Info: 0x0 - Entries: [] - -## Check that we report a warning when the sh_link field of a SHT_GNU_verdef section references a non-string table section. - -# RUN: yaml2obj %s --docnum=3 -o %t3 -# RUN: llvm-readobj -V %t3 2>&1 | FileCheck %s --check-prefix=INVALID-STRING-TABLE -DFILE=%t3 -# RUN: llvm-readelf -V %t3 2>&1 | FileCheck %s --check-prefix=INVALID-STRING-TABLE -DFILE=%t3 - -# INVALID-STRING-TABLE: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verdef section with index 1: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Link: 0x0 - Info: 0x0 - Entries: [] - -## Check that we report a warning when we can't read the content of the SHT_GNU_verdef section. - -# RUN: yaml2obj %s --docnum=4 -o %t4 -# RUN: llvm-readobj -V %t4 2>&1 | FileCheck %s --check-prefix=INVALID-DATA -DFILE=%t4 -# RUN: llvm-readelf -V %t4 2>&1 | FileCheck %s --check-prefix=INVALID-DATA -DFILE=%t4 - -# INVALID-DATA: warning: '[[FILE]]': cannot read content of SHT_GNU_verdef section with index 1: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x0) that cannot be represented - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Link: .dynstr - Info: 0x0 - Entries: [] - ShOffset: 0xFFFFFFFF -DynamicSymbols: - - Name: foo - -## Check that we report a warning when a SHT_GNU_verdef section contains a version definition -## that goes past the end of the section. - -# RUN: yaml2obj %s --docnum=5 -o %t5 -# RUN: llvm-readobj -V %t5 2>&1 | FileCheck %s --check-prefix=DEF-PAST-END -DFILE=%t5 -# RUN: llvm-readelf -V %t5 2>&1 | FileCheck %s --check-prefix=DEF-PAST-END -DFILE=%t5 - -# DEF-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: version definition 1 goes past the end of the section - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Link: .dynstr - Info: 0x1 - Entries: - - Version: 0 - Flags: 0 - VersionNdx: 0 - Hash: 0 - Names: - - FOO - ShSize: 1 -DynamicSymbols: - - Name: foo - -## Check that we report a warning when a SHT_GNU_verdef section contains a version definition -## that refers to an auxiliary entry that goes past the end of the section. - -# RUN: yaml2obj %s --docnum=6 -o %t6 -# RUN: llvm-readobj -V %t6 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t6 -# RUN: llvm-readelf -V %t6 2>&1 | FileCheck %s --check-prefix=AUX-PAST-END -DFILE=%t6 - -# AUX-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: version definition 1 refers to an auxiliary entry that goes past the end of the section - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Link: .dynstr - Info: 0x1 - Entries: - - Version: 0 - Flags: 0 - VersionNdx: 0 - Hash: 0 - Names: - - FOO - ShSize: 21 -DynamicSymbols: - - Name: foo - -## Check that we can dump a SHT_GNU_verdef section properly even if it contains version names strings -## that overrun the linked string table. - -# RUN: yaml2obj %s --docnum=7 -o %t7 -# RUN: llvm-readobj -V %t7 2>&1 | FileCheck %s --check-prefix=PAST-STRTAB-END-LLVM --implicit-check-not="warning:" -DFILE=%t7 -# RUN: llvm-readelf -V %t7 2>&1 | FileCheck %s --check-prefix=PAST-STRTAB-END-GNU --implicit-check-not="warning:" -DFILE=%t7 - -# PAST-STRTAB-END-LLVM: VersionDefinitions [ -# PAST-STRTAB-END-LLVM-NEXT: Definition { -# PAST-STRTAB-END-LLVM-NEXT: Version: 0 -# PAST-STRTAB-END-LLVM-NEXT: Flags [ (0x0) -# PAST-STRTAB-END-LLVM-NEXT: ] -# PAST-STRTAB-END-LLVM-NEXT: Index: 0 -# PAST-STRTAB-END-LLVM-NEXT: Hash: 0 -# PAST-STRTAB-END-LLVM-NEXT: Name: -# PAST-STRTAB-END-LLVM-NEXT: Predecessors: [] -# PAST-STRTAB-END-LLVM-NEXT: } -# PAST-STRTAB-END-LLVM-NEXT: ] - -# PAST-STRTAB-END-GNU: Version definition section '.gnu.version_d' contains 1 entries: -# PAST-STRTAB-END-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 2 (.strtab) -# PAST-STRTAB-END-GNU-NEXT: 0x0000: Rev: 0 Flags: none Index: 0 Cnt: 1 Name: - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Link: .strtab - Info: 0x1 - Entries: - - Version: 0 - Flags: 0 - VersionNdx: 0 - Hash: 0 - Names: - - FOO - - Name: .strtab - Type: SHT_STRTAB -DynamicSymbols: - - Name: BAR - -## Check we report a warning when a version definition is not correctly aligned in memory. - -# RUN: yaml2obj %s --docnum=8 -o %t8 -# RUN: llvm-readobj -V %t8 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t8 -# RUN: llvm-readelf -V %t8 2>&1 | FileCheck %s --check-prefix=MISALIGNED-DEF -DFILE=%t8 - -# MISALIGNED-DEF: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: found a misaligned version definition entry at offset 0x0 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Type: Fill - Size: 0x1 - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Link: .dynstr - Info: 0x1 - Entries: - - Version: 0 - Flags: 0 - VersionNdx: 0 - Hash: 0 - Names: - - FOO -DynamicSymbols: - - Name: foo - -## Check we report a warning when an auxiliary entry is not correctly aligned in memory. - -# RUN: yaml2obj %s --docnum=9 -o %t9 -# RUN: llvm-readobj -V %t9 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t9 -# RUN: llvm-readelf -V %t9 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t9 - -# MISALIGNED-AUX: warning: '[[FILE]]': invalid SHT_GNU_verdef section with index 1: found a misaligned auxiliary entry at offset 0x13 - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_DYN - Machine: EM_X86_64 -Sections: - - Name: .gnu.version_d - Type: SHT_GNU_verdef - Flags: [ SHF_ALLOC ] - Link: .dynstr - Info: 0x1 -## The byte offset to the auxiliary entry is 0x13, i.e. it is not correctly aligned in memory. - Content: "0000000000000100000000001300000000000000" -DynamicSymbols: - - Name: foo - Binding: STB_GLOBAL From b44e91a472526f01d67ee9ce5de2561216782330 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 10:21:47 +0100 Subject: [PATCH 158/591] [lldb] Remove debugging code used for LLDB_DWARF_DONT_COMPLETE_TYPENAMES Reviewers: labath, clayborg, shafik Reviewed By: labath Subscribers: JDevlieghere, lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D70802 --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 33 ------------------- 1 file changed, 33 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index df5c81f2e8305..51fa90322cf07 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1984,39 +1984,6 @@ bool DWARFASTParserClang::CompleteTypeFromDWARF(const DWARFDIE &die, if (!die) return false; -#if defined LLDB_CONFIGURATION_DEBUG - // For debugging purposes, the LLDB_DWARF_DONT_COMPLETE_TYPENAMES environment - // variable can be set with one or more typenames separated by ';' - // characters. This will cause this function to not complete any types whose - // names match. - // - // Examples of setting this environment variable: - // - // LLDB_DWARF_DONT_COMPLETE_TYPENAMES=Foo - // LLDB_DWARF_DONT_COMPLETE_TYPENAMES=Foo;Bar;Baz - const char *dont_complete_typenames_cstr = - getenv("LLDB_DWARF_DONT_COMPLETE_TYPENAMES"); - if (dont_complete_typenames_cstr && dont_complete_typenames_cstr[0]) { - const char *die_name = die.GetName(); - if (die_name && die_name[0]) { - const char *match = strstr(dont_complete_typenames_cstr, die_name); - if (match) { - size_t die_name_length = strlen(die_name); - while (match) { - const char separator_char = ';'; - const char next_char = match[die_name_length]; - if (next_char == '\0' || next_char == separator_char) { - if (match == dont_complete_typenames_cstr || - match[-1] == separator_char) - return false; - } - match = strstr(match + 1, die_name); - } - } - } - } -#endif - const dw_tag_t tag = die.Tag(); Log *log = From 2e5bb6d8d944767aa777e70dda8e4ce2f600d2f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 27 Nov 2019 14:08:01 +0200 Subject: [PATCH 159/591] [LLDB] [PECOFF] Factorize mapping section names to types using StringSwitch. NFCI. Keep the existing special cases based on combinations of section name, flags and sizes/offsets. Differential Revision: https://reviews.llvm.org/D70778 --- .../ObjectFile/PECOFF/ObjectFilePECOFF.cpp | 171 ++++++++---------- .../ObjectFile/PECOFF/ObjectFilePECOFF.h | 2 + 2 files changed, 75 insertions(+), 98 deletions(-) diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index 37e1120838f37..84ecd5e019fc3 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -787,6 +787,76 @@ bool ObjectFilePECOFF::IsStripped() { return false; } +SectionType ObjectFilePECOFF::GetSectionType(llvm::StringRef sect_name, + const section_header_t §) { + ConstString const_sect_name(sect_name); + static ConstString g_code_sect_name(".code"); + static ConstString g_CODE_sect_name("CODE"); + static ConstString g_data_sect_name(".data"); + static ConstString g_DATA_sect_name("DATA"); + static ConstString g_bss_sect_name(".bss"); + static ConstString g_BSS_sect_name("BSS"); + + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_CODE && + ((const_sect_name == g_code_sect_name) || + (const_sect_name == g_CODE_sect_name))) { + return eSectionTypeCode; + } + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA && + ((const_sect_name == g_data_sect_name) || + (const_sect_name == g_DATA_sect_name))) { + if (sect.size == 0 && sect.offset == 0) + return eSectionTypeZeroFill; + else + return eSectionTypeData; + } + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA && + ((const_sect_name == g_bss_sect_name) || + (const_sect_name == g_BSS_sect_name))) { + if (sect.size == 0) + return eSectionTypeZeroFill; + else + return eSectionTypeData; + } + + SectionType section_type = + llvm::StringSwitch(sect_name) + .Case(".debug", eSectionTypeDebug) + .Case(".stabstr", eSectionTypeDataCString) + .Case(".reloc", eSectionTypeOther) + .Case(".debug_abbrev", eSectionTypeDWARFDebugAbbrev) + .Case(".debug_aranges", eSectionTypeDWARFDebugAranges) + .Case(".debug_frame", eSectionTypeDWARFDebugFrame) + .Case(".debug_info", eSectionTypeDWARFDebugInfo) + .Case(".debug_line", eSectionTypeDWARFDebugLine) + .Case(".debug_loc", eSectionTypeDWARFDebugLoc) + .Case(".debug_loclists", eSectionTypeDWARFDebugLocLists) + .Case(".debug_macinfo", eSectionTypeDWARFDebugMacInfo) + .Case(".debug_names", eSectionTypeDWARFDebugNames) + .Case(".debug_pubnames", eSectionTypeDWARFDebugPubNames) + .Case(".debug_pubtypes", eSectionTypeDWARFDebugPubTypes) + .Case(".debug_ranges", eSectionTypeDWARFDebugRanges) + .Case(".debug_str", eSectionTypeDWARFDebugStr) + .Case(".debug_types", eSectionTypeDWARFDebugTypes) + .Case(".eh_frame", eSectionTypeEHFrame) + .Case(".gosymtab", eSectionTypeGoSymtab) + .Default(eSectionTypeInvalid); + if (section_type != eSectionTypeInvalid) + return section_type; + + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_CODE) + return eSectionTypeCode; + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + return eSectionTypeData; + if (sect.flags & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + if (sect.size == 0) + return eSectionTypeZeroFill; + else + return eSectionTypeData; + } + return eSectionTypeOther; +} + void ObjectFilePECOFF::CreateSections(SectionList &unified_section_list) { if (m_sections_up) return; @@ -810,104 +880,9 @@ void ObjectFilePECOFF::CreateSections(SectionList &unified_section_list) { const uint32_t nsects = m_sect_headers.size(); ModuleSP module_sp(GetModule()); for (uint32_t idx = 0; idx < nsects; ++idx) { - ConstString const_sect_name(GetSectionName(m_sect_headers[idx])); - static ConstString g_code_sect_name(".code"); - static ConstString g_CODE_sect_name("CODE"); - static ConstString g_data_sect_name(".data"); - static ConstString g_DATA_sect_name("DATA"); - static ConstString g_bss_sect_name(".bss"); - static ConstString g_BSS_sect_name("BSS"); - static ConstString g_debug_sect_name(".debug"); - static ConstString g_reloc_sect_name(".reloc"); - static ConstString g_stab_sect_name(".stab"); - static ConstString g_stabstr_sect_name(".stabstr"); - static ConstString g_sect_name_dwarf_debug_abbrev(".debug_abbrev"); - static ConstString g_sect_name_dwarf_debug_aranges(".debug_aranges"); - static ConstString g_sect_name_dwarf_debug_frame(".debug_frame"); - static ConstString g_sect_name_dwarf_debug_info(".debug_info"); - static ConstString g_sect_name_dwarf_debug_line(".debug_line"); - static ConstString g_sect_name_dwarf_debug_loc(".debug_loc"); - static ConstString g_sect_name_dwarf_debug_loclists(".debug_loclists"); - static ConstString g_sect_name_dwarf_debug_macinfo(".debug_macinfo"); - static ConstString g_sect_name_dwarf_debug_names(".debug_names"); - static ConstString g_sect_name_dwarf_debug_pubnames(".debug_pubnames"); - static ConstString g_sect_name_dwarf_debug_pubtypes(".debug_pubtypes"); - static ConstString g_sect_name_dwarf_debug_ranges(".debug_ranges"); - static ConstString g_sect_name_dwarf_debug_str(".debug_str"); - static ConstString g_sect_name_dwarf_debug_types(".debug_types"); - static ConstString g_sect_name_eh_frame(".eh_frame"); - static ConstString g_sect_name_go_symtab(".gosymtab"); - SectionType section_type = eSectionTypeOther; - if (m_sect_headers[idx].flags & llvm::COFF::IMAGE_SCN_CNT_CODE && - ((const_sect_name == g_code_sect_name) || - (const_sect_name == g_CODE_sect_name))) { - section_type = eSectionTypeCode; - } else if (m_sect_headers[idx].flags & - llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA && - ((const_sect_name == g_data_sect_name) || - (const_sect_name == g_DATA_sect_name))) { - if (m_sect_headers[idx].size == 0 && m_sect_headers[idx].offset == 0) - section_type = eSectionTypeZeroFill; - else - section_type = eSectionTypeData; - } else if (m_sect_headers[idx].flags & - llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA && - ((const_sect_name == g_bss_sect_name) || - (const_sect_name == g_BSS_sect_name))) { - if (m_sect_headers[idx].size == 0) - section_type = eSectionTypeZeroFill; - else - section_type = eSectionTypeData; - } else if (const_sect_name == g_debug_sect_name) { - section_type = eSectionTypeDebug; - } else if (const_sect_name == g_stabstr_sect_name) { - section_type = eSectionTypeDataCString; - } else if (const_sect_name == g_reloc_sect_name) { - section_type = eSectionTypeOther; - } else if (const_sect_name == g_sect_name_dwarf_debug_abbrev) - section_type = eSectionTypeDWARFDebugAbbrev; - else if (const_sect_name == g_sect_name_dwarf_debug_aranges) - section_type = eSectionTypeDWARFDebugAranges; - else if (const_sect_name == g_sect_name_dwarf_debug_frame) - section_type = eSectionTypeDWARFDebugFrame; - else if (const_sect_name == g_sect_name_dwarf_debug_info) - section_type = eSectionTypeDWARFDebugInfo; - else if (const_sect_name == g_sect_name_dwarf_debug_line) - section_type = eSectionTypeDWARFDebugLine; - else if (const_sect_name == g_sect_name_dwarf_debug_loc) - section_type = eSectionTypeDWARFDebugLoc; - else if (const_sect_name == g_sect_name_dwarf_debug_loclists) - section_type = eSectionTypeDWARFDebugLocLists; - else if (const_sect_name == g_sect_name_dwarf_debug_macinfo) - section_type = eSectionTypeDWARFDebugMacInfo; - else if (const_sect_name == g_sect_name_dwarf_debug_names) - section_type = eSectionTypeDWARFDebugNames; - else if (const_sect_name == g_sect_name_dwarf_debug_pubnames) - section_type = eSectionTypeDWARFDebugPubNames; - else if (const_sect_name == g_sect_name_dwarf_debug_pubtypes) - section_type = eSectionTypeDWARFDebugPubTypes; - else if (const_sect_name == g_sect_name_dwarf_debug_ranges) - section_type = eSectionTypeDWARFDebugRanges; - else if (const_sect_name == g_sect_name_dwarf_debug_str) - section_type = eSectionTypeDWARFDebugStr; - else if (const_sect_name == g_sect_name_dwarf_debug_types) - section_type = eSectionTypeDWARFDebugTypes; - else if (const_sect_name == g_sect_name_eh_frame) - section_type = eSectionTypeEHFrame; - else if (const_sect_name == g_sect_name_go_symtab) - section_type = eSectionTypeGoSymtab; - else if (m_sect_headers[idx].flags & llvm::COFF::IMAGE_SCN_CNT_CODE) { - section_type = eSectionTypeCode; - } else if (m_sect_headers[idx].flags & - llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) { - section_type = eSectionTypeData; - } else if (m_sect_headers[idx].flags & - llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) { - if (m_sect_headers[idx].size == 0) - section_type = eSectionTypeZeroFill; - else - section_type = eSectionTypeData; - } + llvm::StringRef sect_name = GetSectionName(m_sect_headers[idx]); + ConstString const_sect_name(sect_name); + SectionType section_type = GetSectionType(sect_name, m_sect_headers[idx]); SectionSP section_sp(new Section( module_sp, // Module to which this section belongs diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h index 78088ecc43778..c0efe702f5700 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h @@ -283,6 +283,8 @@ class ObjectFilePECOFF : public lldb_private::ObjectFile { void DumpDependentModules(lldb_private::Stream *s); llvm::StringRef GetSectionName(const section_header_t §); + static lldb::SectionType GetSectionType(llvm::StringRef sect_name, + const section_header_t §); typedef std::vector SectionHeaderColl; typedef SectionHeaderColl::iterator SectionHeaderCollIter; From 934c025e9bdd28db544dfd57783d0fb8907a92d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 16 Oct 2019 00:01:15 +0300 Subject: [PATCH 160/591] [LLDB] [PECOFF] Look for the truncated ".eh_fram" section name COFF section names can either be stored truncated to 8 chars, in the section header, or as a longer section name, stored separately in the string table. libunwind locates the .eh_frame section by runtime introspection, which only works for section names stored in the section header (as the string table isn't mapped at runtime). To support this behaviour, lld always truncates the section names for sections that will be mapped, like .eh_frame. Differential Revision: https://reviews.llvm.org/D70745 --- .../ObjectFile/PECOFF/ObjectFilePECOFF.cpp | 3 +- .../ObjectFile/PECOFF/section-types.yaml | 92 +++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 lldb/test/Shell/ObjectFile/PECOFF/section-types.yaml diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp index 84ecd5e019fc3..6978a31fb2e51 100644 --- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp +++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp @@ -838,7 +838,8 @@ SectionType ObjectFilePECOFF::GetSectionType(llvm::StringRef sect_name, .Case(".debug_ranges", eSectionTypeDWARFDebugRanges) .Case(".debug_str", eSectionTypeDWARFDebugStr) .Case(".debug_types", eSectionTypeDWARFDebugTypes) - .Case(".eh_frame", eSectionTypeEHFrame) + // .eh_frame can be truncated to 8 chars. + .Cases(".eh_frame", ".eh_fram", eSectionTypeEHFrame) .Case(".gosymtab", eSectionTypeGoSymtab) .Default(eSectionTypeInvalid); if (section_type != eSectionTypeInvalid) diff --git a/lldb/test/Shell/ObjectFile/PECOFF/section-types.yaml b/lldb/test/Shell/ObjectFile/PECOFF/section-types.yaml new file mode 100644 index 0000000000000..caf955500e09f --- /dev/null +++ b/lldb/test/Shell/ObjectFile/PECOFF/section-types.yaml @@ -0,0 +1,92 @@ +# RUN: yaml2obj %s > %t +# RUN: lldb-test object-file %t | FileCheck %s + +# CHECK-LABEL: Name: .text +# CHECK-NEXT: Type: code + +# CHECK-LABEL: Name: .eh_fram +# CHECK-NEXT: Type: eh-frame +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4096 + ImageBase: 4194304 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT, IMAGE_DLL_CHARACTERISTICS_NO_SEH, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + ExportTable: + RelativeVirtualAddress: 0 + Size: 0 + ImportTable: + RelativeVirtualAddress: 0 + Size: 0 + ResourceTable: + RelativeVirtualAddress: 0 + Size: 0 + ExceptionTable: + RelativeVirtualAddress: 0 + Size: 0 + CertificateTable: + RelativeVirtualAddress: 0 + Size: 0 + BaseRelocationTable: + RelativeVirtualAddress: 12288 + Size: 12 + Debug: + RelativeVirtualAddress: 0 + Size: 0 + Architecture: + RelativeVirtualAddress: 0 + Size: 0 + GlobalPtr: + RelativeVirtualAddress: 0 + Size: 0 + TlsTable: + RelativeVirtualAddress: 0 + Size: 0 + LoadConfigTable: + RelativeVirtualAddress: 0 + Size: 0 + BoundImport: + RelativeVirtualAddress: 0 + Size: 0 + IAT: + RelativeVirtualAddress: 0 + Size: 0 + DelayImportDescriptor: + RelativeVirtualAddress: 0 + Size: 0 + ClrRuntimeHeader: + RelativeVirtualAddress: 0 + Size: 0 +header: + Machine: IMAGE_FILE_MACHINE_I386 + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 5 + SectionData: 5589E55DC3 + - Name: .eh_fram + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + VirtualAddress: 8192 + VirtualSize: 52 + SectionData: 1400000000000000017A5200017C0801000C040488010000180000001C000000001040000500000000410E088502420D05000000 + - Name: .reloc + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 12288 + VirtualSize: 12 + SectionData: 002000000C00000020300000 +symbols: +... From f5c54f40327b1680bf7e55d358b43a92395ae669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 16 Oct 2019 00:05:06 +0300 Subject: [PATCH 161/591] [LLDB] Always interpret arm instructions as thumb on windows Windows on ARM always uses thumb mode, and doesn't have most of the mechanisms that are used in e.g. ELF for distinguishing between arm and thumb. Differential Revision: https://reviews.llvm.org/D70796 --- lldb/source/Utility/ArchSpec.cpp | 3 + .../ObjectFile/PECOFF/disassemble-thumb.yaml | 92 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp index 62d9d246255a1..38f6752b0348d 100644 --- a/lldb/source/Utility/ArchSpec.cpp +++ b/lldb/source/Utility/ArchSpec.cpp @@ -1443,6 +1443,9 @@ bool ArchSpec::IsAlwaysThumbInstructions() const { GetCore() == ArchSpec::Core::eCore_thumbv6m) { return true; } + // Windows on ARM is always thumb. + if (GetTriple().isOSWindows()) + return true; } return false; } diff --git a/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml b/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml new file mode 100644 index 0000000000000..dec2357596918 --- /dev/null +++ b/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml @@ -0,0 +1,92 @@ +# RUN: yaml2obj %s > %t.exe +# RUN: %lldb %t.exe -o "disassemble -b -n entry" -b | FileCheck %s + +# CHECK: {{.*}}.exe[0x401000] <+0>: 0x0040 lsls r0, r0, #0x1 +# CHECK: {{.*}}.exe[0x401002] <+2>: 0x4770 bx lr + +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4097 + ImageBase: 4194304 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + ExportTable: + RelativeVirtualAddress: 0 + Size: 0 + ImportTable: + RelativeVirtualAddress: 0 + Size: 0 + ResourceTable: + RelativeVirtualAddress: 0 + Size: 0 + ExceptionTable: + RelativeVirtualAddress: 0 + Size: 0 + CertificateTable: + RelativeVirtualAddress: 0 + Size: 0 + BaseRelocationTable: + RelativeVirtualAddress: 0 + Size: 0 + Debug: + RelativeVirtualAddress: 0 + Size: 0 + Architecture: + RelativeVirtualAddress: 0 + Size: 0 + GlobalPtr: + RelativeVirtualAddress: 0 + Size: 0 + TlsTable: + RelativeVirtualAddress: 0 + Size: 0 + LoadConfigTable: + RelativeVirtualAddress: 0 + Size: 0 + BoundImport: + RelativeVirtualAddress: 0 + Size: 0 + IAT: + RelativeVirtualAddress: 0 + Size: 0 + DelayImportDescriptor: + RelativeVirtualAddress: 0 + Size: 0 + ClrRuntimeHeader: + RelativeVirtualAddress: 0 + Size: 0 +header: + Machine: IMAGE_FILE_MACHINE_ARMNT + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 4 + SectionData: '40007047' +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: entry + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... From 7f362f04a7812111dec7eb11279a53566e09cdfb Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 27 Nov 2019 13:45:04 +0300 Subject: [PATCH 162/591] [llvm-readelf] - Make GNU style dumping of invalid SHT_GNU_verdef be consistent with LLVM style. When we dump SHT_GNU_verdef section that has sh_link that references a non-existent section, llvm-readobj reports a warning and continues dump, but llvm-readelf fails with a error. This patch fixes the issue and opens road for futher follow-ups for improving the printGNUVersionSectionProlog(). Differential revision: https://reviews.llvm.org/D70776 --- .../llvm-readobj/elf-verdef-invalid.test | 10 ++--- llvm/tools/llvm-readobj/ELFDumper.cpp | 40 +++++++++++-------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test index 493537464a008..3a4de36983857 100644 --- a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test +++ b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test @@ -3,14 +3,14 @@ ## Check that we report a warning when sh_link references a non-existent section. # RUN: yaml2obj %s --docnum=1 -o %t1 -# RUN: llvm-readobj -V %t1 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-LLVM -DFILE=%t1 -# RUN: not llvm-readelf -V %t1 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-GNU -DFILE=%t1 +# RUN: llvm-readobj -V %t1 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-LLVM --implicit-check-not="warning:" -DFILE=%t1 +# RUN: llvm-readelf -V %t1 2>&1 | FileCheck %s --check-prefix=INVALID-LINK-GNU --implicit-check-not="warning:" -DFILE=%t1 # INVALID-LINK-LLVM: warning: '[[FILE]]': invalid section linked to SHT_GNU_verdef section with index 1: invalid section index: 255 -## TODO: llvm-readelf should also report a meaningful warning instead of an error. -# INVALID-LINK-GNU: Version definition -# INVALID-LINK-GNU: error: '[[FILE]]': invalid section index: 255 +# INVALID-LINK-GNU: Version definition section '.gnu.version_d' contains 0 entries: +# INVALID-LINK-GNU: warning: '[[FILE]]': invalid section linked to SHT_GNU_verdef section with index 1: invalid section index: 255 +# INVALID-LINK-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 255 () --- !ELF FileHeader: diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index b7bd35e7c95cb..79d08d379a1ad 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -672,6 +672,9 @@ template class GNUStyle : public DumpStyle { bool checkPTDynamic(const Elf_Phdr &Phdr, const Elf_Shdr &Sec); void printProgramHeaders(const ELFO *Obj); void printSectionMapping(const ELFO *Obj); + void printGNUVersionSectionProlog(const ELFFile *Obj, + const typename ELFT::Shdr *Sec, + const Twine &Label, unsigned EntriesNum); }; template @@ -3921,18 +3924,26 @@ void GNUStyle::printDynamicRelocations(const ELFO *Obj) { } template -static void printGNUVersionSectionProlog(formatted_raw_ostream &OS, - const Twine &Name, unsigned EntriesNum, - const ELFFile *Obj, - const typename ELFT::Shdr *Sec, - StringRef FileName) { - StringRef SecName = unwrapOrError(FileName, Obj->getSectionName(Sec)); - OS << Name << " section '" << SecName << "' " +void GNUStyle::printGNUVersionSectionProlog( + const ELFFile *Obj, const typename ELFT::Shdr *Sec, + const Twine &Label, unsigned EntriesNum) { + StringRef SecName = unwrapOrError(this->FileName, Obj->getSectionName(Sec)); + OS << Label << " section '" << SecName << "' " << "contains " << EntriesNum << " entries:\n"; - const typename ELFT::Shdr *SymTab = - unwrapOrError(FileName, Obj->getSection(Sec->sh_link)); - StringRef SymTabName = unwrapOrError(FileName, Obj->getSectionName(SymTab)); + unsigned SecNdx = Sec - &cantFail(Obj->sections()).front(); + StringRef SymTabName = ""; + + Expected SymTabOrErr = + Obj->getSection(Sec->sh_link); + if (SymTabOrErr) + SymTabName = + unwrapOrError(this->FileName, Obj->getSectionName(*SymTabOrErr)); + else + this->reportUniqueWarning(createError( + "invalid section linked to SHT_GNU_verdef section with index " + + Twine(SecNdx) + ": " + toString(SymTabOrErr.takeError()))); + OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16) << " Offset: " << format_hex(Sec->sh_offset, 8) << " Link: " << Sec->sh_link << " (" << SymTabName << ")\n"; @@ -3945,8 +3956,7 @@ void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, return; unsigned Entries = Sec->sh_size / sizeof(Elf_Versym); - printGNUVersionSectionProlog(OS, "Version symbols", Entries, Obj, Sec, - this->FileName); + printGNUVersionSectionProlog(Obj, Sec, "Version symbols", Entries); const uint8_t *VersymBuf = reinterpret_cast(Obj->base() + Sec->sh_offset); @@ -4017,8 +4027,7 @@ void GNUStyle::printVersionDefinitionSection(const ELFFile *Obj, if (!Sec) return; - printGNUVersionSectionProlog(OS, "Version definition", Sec->sh_info, Obj, Sec, - this->FileName); + printGNUVersionSectionProlog(Obj, Sec, "Version definition", Sec->sh_info); Expected> V = this->dumper()->getVersionDefinitions(Sec); if (!V) { @@ -4047,8 +4056,7 @@ void GNUStyle::printVersionDependencySection(const ELFFile *Obj, return; unsigned VerneedNum = Sec->sh_info; - printGNUVersionSectionProlog(OS, "Version needs", VerneedNum, Obj, Sec, - this->FileName); + printGNUVersionSectionProlog(Obj, Sec, "Version needs", VerneedNum); ArrayRef SecData = unwrapOrError(this->FileName, Obj->getSectionContents(Sec)); From f7e31e0cfd3b467a21c2ac9a94f5c828f88a9b72 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 10:33:36 +0100 Subject: [PATCH 163/591] [lldb][NFC] Split up DWARFASTParserClang::CompleteTypeFromDWARF Moving the different parts into their own functions without any additional cleanup/refactoring, so this is NFC. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 460 +++++++++--------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 5 + 2 files changed, 243 insertions(+), 222 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 51fa90322cf07..ba17469ea998f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1969,267 +1969,283 @@ bool DWARFASTParserClang::ParseTemplateParameterInfos( return template_param_infos.args.size() == template_param_infos.names.size(); } -bool DWARFASTParserClang::CompleteTypeFromDWARF(const DWARFDIE &die, - lldb_private::Type *type, - CompilerType &clang_type) { - SymbolFileDWARF *dwarf = die.GetDWARF(); - - std::lock_guard guard( - dwarf->GetObjectFile()->GetModule()->GetMutex()); - - // Disable external storage for this type so we don't get anymore - // clang::ExternalASTSource queries for this type. - m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), false); - - if (!die) - return false; - +bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, + lldb_private::Type *type, + CompilerType &clang_type) { const dw_tag_t tag = die.Tag(); - + SymbolFileDWARF *dwarf = die.GetDWARF(); Log *log = nullptr; // (LogChannelDWARF::GetLogIfAny(DWARF_LOG_DEBUG_INFO|DWARF_LOG_TYPE_COMPLETION)); - if (log) - dwarf->GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( - log, "0x%8.8" PRIx64 ": %s '%s' resolving forward declaration...", - die.GetID(), die.GetTagAsCString(), type->GetName().AsCString()); - assert(clang_type); - DWARFAttributes attributes; - switch (tag) { - case DW_TAG_structure_type: - case DW_TAG_union_type: - case DW_TAG_class_type: { - ClangASTImporter::LayoutInfo layout_info; - - { - if (die.HasChildren()) { - LanguageType class_language = eLanguageTypeUnknown; - if (ClangASTContext::IsObjCObjectOrInterfaceType(clang_type)) { - class_language = eLanguageTypeObjC; - // For objective C we don't start the definition when the class is - // created. - ClangASTContext::StartTagDeclarationDefinition(clang_type); - } - int tag_decl_kind = -1; - AccessType default_accessibility = eAccessNone; - if (tag == DW_TAG_structure_type) { - tag_decl_kind = clang::TTK_Struct; - default_accessibility = eAccessPublic; - } else if (tag == DW_TAG_union_type) { - tag_decl_kind = clang::TTK_Union; - default_accessibility = eAccessPublic; - } else if (tag == DW_TAG_class_type) { - tag_decl_kind = clang::TTK_Class; - default_accessibility = eAccessPrivate; - } + ClangASTImporter::LayoutInfo layout_info; - std::vector> bases; - std::vector member_accessibilities; - bool is_a_class = false; - // Parse members and base classes first - std::vector member_function_dies; + { + if (die.HasChildren()) { + LanguageType class_language = eLanguageTypeUnknown; + if (ClangASTContext::IsObjCObjectOrInterfaceType(clang_type)) { + class_language = eLanguageTypeObjC; + // For objective C we don't start the definition when the class is + // created. + ClangASTContext::StartTagDeclarationDefinition(clang_type); + } - DelayedPropertyList delayed_properties; - ParseChildMembers(die, clang_type, class_language, bases, - member_accessibilities, member_function_dies, - delayed_properties, default_accessibility, is_a_class, - layout_info); + int tag_decl_kind = -1; + AccessType default_accessibility = eAccessNone; + if (tag == DW_TAG_structure_type) { + tag_decl_kind = clang::TTK_Struct; + default_accessibility = eAccessPublic; + } else if (tag == DW_TAG_union_type) { + tag_decl_kind = clang::TTK_Union; + default_accessibility = eAccessPublic; + } else if (tag == DW_TAG_class_type) { + tag_decl_kind = clang::TTK_Class; + default_accessibility = eAccessPrivate; + } - // Now parse any methods if there were any... - for (const DWARFDIE &die : member_function_dies) - dwarf->ResolveType(die); + std::vector> bases; + std::vector member_accessibilities; + bool is_a_class = false; + // Parse members and base classes first + std::vector member_function_dies; - if (class_language == eLanguageTypeObjC) { - ConstString class_name(clang_type.GetTypeName()); - if (class_name) { - DIEArray method_die_offsets; - dwarf->GetObjCMethodDIEOffsets(class_name, method_die_offsets); + DelayedPropertyList delayed_properties; + ParseChildMembers(die, clang_type, class_language, bases, + member_accessibilities, member_function_dies, + delayed_properties, default_accessibility, is_a_class, + layout_info); - if (!method_die_offsets.empty()) { - DWARFDebugInfo *debug_info = dwarf->DebugInfo(); + // Now parse any methods if there were any... + for (const DWARFDIE &die : member_function_dies) + dwarf->ResolveType(die); - const size_t num_matches = method_die_offsets.size(); - for (size_t i = 0; i < num_matches; ++i) { - const DIERef &die_ref = method_die_offsets[i]; - DWARFDIE method_die = debug_info->GetDIE(die_ref); + if (class_language == eLanguageTypeObjC) { + ConstString class_name(clang_type.GetTypeName()); + if (class_name) { + DIEArray method_die_offsets; + dwarf->GetObjCMethodDIEOffsets(class_name, method_die_offsets); - if (method_die) - method_die.ResolveType(); - } - } + if (!method_die_offsets.empty()) { + DWARFDebugInfo *debug_info = dwarf->DebugInfo(); + + const size_t num_matches = method_die_offsets.size(); + for (size_t i = 0; i < num_matches; ++i) { + const DIERef &die_ref = method_die_offsets[i]; + DWARFDIE method_die = debug_info->GetDIE(die_ref); - for (DelayedPropertyList::iterator pi = delayed_properties.begin(), - pe = delayed_properties.end(); - pi != pe; ++pi) - pi->Finalize(); + if (method_die) + method_die.ResolveType(); + } } - } - // If we have a DW_TAG_structure_type instead of a DW_TAG_class_type we - // need to tell the clang type it is actually a class. - if (class_language != eLanguageTypeObjC) { - if (is_a_class && tag_decl_kind != clang::TTK_Class) - m_ast.SetTagTypeKind(ClangUtil::GetQualType(clang_type), - clang::TTK_Class); + for (DelayedPropertyList::iterator pi = delayed_properties.begin(), + pe = delayed_properties.end(); + pi != pe; ++pi) + pi->Finalize(); } + } - // Since DW_TAG_structure_type gets used for both classes and - // structures, we may need to set any DW_TAG_member fields to have a - // "private" access if none was specified. When we parsed the child - // members we tracked that actual accessibility value for each - // DW_TAG_member in the "member_accessibilities" array. If the value - // for the member is zero, then it was set to the - // "default_accessibility" which for structs was "public". Below we - // correct this by setting any fields to "private" that weren't - // correctly set. - if (is_a_class && !member_accessibilities.empty()) { - // This is a class and all members that didn't have their access - // specified are private. - m_ast.SetDefaultAccessForRecordFields( - m_ast.GetAsRecordDecl(clang_type), eAccessPrivate, - &member_accessibilities.front(), member_accessibilities.size()); - } + // If we have a DW_TAG_structure_type instead of a DW_TAG_class_type we + // need to tell the clang type it is actually a class. + if (class_language != eLanguageTypeObjC) { + if (is_a_class && tag_decl_kind != clang::TTK_Class) + m_ast.SetTagTypeKind(ClangUtil::GetQualType(clang_type), + clang::TTK_Class); + } - if (!bases.empty()) { - // Make sure all base classes refer to complete types and not forward - // declarations. If we don't do this, clang will crash with an - // assertion in the call to clang_type.TransferBaseClasses() - for (const auto &base_class : bases) { - clang::TypeSourceInfo *type_source_info = - base_class->getTypeSourceInfo(); - if (type_source_info) { - CompilerType base_class_type( - &m_ast, type_source_info->getType().getAsOpaquePtr()); - if (!base_class_type.GetCompleteType()) { - auto module = dwarf->GetObjectFile()->GetModule(); - module->ReportError(":: Class '%s' has a base class '%s' which " - "does not have a complete definition.", - die.GetName(), - base_class_type.GetTypeName().GetCString()); - if (die.GetCU()->GetProducer() == eProducerClang) - module->ReportError(":: Try compiling the source file with " - "-fstandalone-debug."); - - // We have no choice other than to pretend that the base class - // is complete. If we don't do this, clang will crash when we - // call setBases() inside of - // "clang_type.TransferBaseClasses()" below. Since we - // provide layout assistance, all ivars in this class and other - // classes will be fine, this is the best we can do short of - // crashing. - if (ClangASTContext::StartTagDeclarationDefinition( - base_class_type)) { - ClangASTContext::CompleteTagDeclarationDefinition( - base_class_type); - } + // Since DW_TAG_structure_type gets used for both classes and + // structures, we may need to set any DW_TAG_member fields to have a + // "private" access if none was specified. When we parsed the child + // members we tracked that actual accessibility value for each + // DW_TAG_member in the "member_accessibilities" array. If the value + // for the member is zero, then it was set to the + // "default_accessibility" which for structs was "public". Below we + // correct this by setting any fields to "private" that weren't + // correctly set. + if (is_a_class && !member_accessibilities.empty()) { + // This is a class and all members that didn't have their access + // specified are private. + m_ast.SetDefaultAccessForRecordFields( + m_ast.GetAsRecordDecl(clang_type), eAccessPrivate, + &member_accessibilities.front(), member_accessibilities.size()); + } + + if (!bases.empty()) { + // Make sure all base classes refer to complete types and not forward + // declarations. If we don't do this, clang will crash with an + // assertion in the call to clang_type.TransferBaseClasses() + for (const auto &base_class : bases) { + clang::TypeSourceInfo *type_source_info = + base_class->getTypeSourceInfo(); + if (type_source_info) { + CompilerType base_class_type( + &m_ast, type_source_info->getType().getAsOpaquePtr()); + if (!base_class_type.GetCompleteType()) { + auto module = dwarf->GetObjectFile()->GetModule(); + module->ReportError(":: Class '%s' has a base class '%s' which " + "does not have a complete definition.", + die.GetName(), + base_class_type.GetTypeName().GetCString()); + if (die.GetCU()->GetProducer() == eProducerClang) + module->ReportError(":: Try compiling the source file with " + "-fstandalone-debug."); + + // We have no choice other than to pretend that the base class + // is complete. If we don't do this, clang will crash when we + // call setBases() inside of + // "clang_type.TransferBaseClasses()" below. Since we + // provide layout assistance, all ivars in this class and other + // classes will be fine, this is the best we can do short of + // crashing. + if (ClangASTContext::StartTagDeclarationDefinition( + base_class_type)) { + ClangASTContext::CompleteTagDeclarationDefinition( + base_class_type); } } } - - m_ast.TransferBaseClasses(clang_type.GetOpaqueQualType(), - std::move(bases)); } + + m_ast.TransferBaseClasses(clang_type.GetOpaqueQualType(), + std::move(bases)); } } + } - m_ast.AddMethodOverridesForCXXRecordType(clang_type.GetOpaqueQualType()); - ClangASTContext::BuildIndirectFields(clang_type); - ClangASTContext::CompleteTagDeclarationDefinition(clang_type); + m_ast.AddMethodOverridesForCXXRecordType(clang_type.GetOpaqueQualType()); + ClangASTContext::BuildIndirectFields(clang_type); + ClangASTContext::CompleteTagDeclarationDefinition(clang_type); - if (!layout_info.field_offsets.empty() || - !layout_info.base_offsets.empty() || - !layout_info.vbase_offsets.empty()) { - if (type) - layout_info.bit_size = type->GetByteSize().getValueOr(0) * 8; - if (layout_info.bit_size == 0) - layout_info.bit_size = - die.GetAttributeValueAsUnsigned(DW_AT_byte_size, 0) * 8; - - clang::CXXRecordDecl *record_decl = - m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); - if (record_decl) { - if (log) { - ModuleSP module_sp = dwarf->GetObjectFile()->GetModule(); + if (!layout_info.field_offsets.empty() || !layout_info.base_offsets.empty() || + !layout_info.vbase_offsets.empty()) { + if (type) + layout_info.bit_size = type->GetByteSize().getValueOr(0) * 8; + if (layout_info.bit_size == 0) + layout_info.bit_size = + die.GetAttributeValueAsUnsigned(DW_AT_byte_size, 0) * 8; - if (module_sp) { - module_sp->LogMessage( - log, - "ClangASTContext::CompleteTypeFromDWARF (clang_type = %p) " - "caching layout info for record_decl = %p, bit_size = %" PRIu64 - ", alignment = %" PRIu64 - ", field_offsets[%u], base_offsets[%u], vbase_offsets[%u])", - static_cast(clang_type.GetOpaqueQualType()), - static_cast(record_decl), layout_info.bit_size, - layout_info.alignment, - static_cast(layout_info.field_offsets.size()), - static_cast(layout_info.base_offsets.size()), - static_cast(layout_info.vbase_offsets.size())); - - uint32_t idx; - { - llvm::DenseMap::const_iterator - pos, - end = layout_info.field_offsets.end(); - for (idx = 0, pos = layout_info.field_offsets.begin(); pos != end; - ++pos, ++idx) { - module_sp->LogMessage( - log, "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) field[%u] = { bit_offset=%u, name='%s' }", - static_cast(clang_type.GetOpaqueQualType()), idx, - static_cast(pos->second), - pos->first->getNameAsString().c_str()); - } + clang::CXXRecordDecl *record_decl = + m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); + if (record_decl) { + if (log) { + ModuleSP module_sp = dwarf->GetObjectFile()->GetModule(); + + if (module_sp) { + module_sp->LogMessage( + log, + "ClangASTContext::CompleteTypeFromDWARF (clang_type = %p) " + "caching layout info for record_decl = %p, bit_size = %" PRIu64 + ", alignment = %" PRIu64 + ", field_offsets[%u], base_offsets[%u], vbase_offsets[%u])", + static_cast(clang_type.GetOpaqueQualType()), + static_cast(record_decl), layout_info.bit_size, + layout_info.alignment, + static_cast(layout_info.field_offsets.size()), + static_cast(layout_info.base_offsets.size()), + static_cast(layout_info.vbase_offsets.size())); + + uint32_t idx; + { + llvm::DenseMap::const_iterator + pos, + end = layout_info.field_offsets.end(); + for (idx = 0, pos = layout_info.field_offsets.begin(); pos != end; + ++pos, ++idx) { + module_sp->LogMessage( + log, + "ClangASTContext::CompleteTypeFromDWARF (clang_type = " + "%p) field[%u] = { bit_offset=%u, name='%s' }", + static_cast(clang_type.GetOpaqueQualType()), idx, + static_cast(pos->second), + pos->first->getNameAsString().c_str()); } + } - { - llvm::DenseMap::const_iterator base_pos, - base_end = layout_info.base_offsets.end(); - for (idx = 0, base_pos = layout_info.base_offsets.begin(); - base_pos != base_end; ++base_pos, ++idx) { - module_sp->LogMessage( - log, "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) base[%u] = { byte_offset=%u, name='%s' }", - clang_type.GetOpaqueQualType(), idx, - (uint32_t)base_pos->second.getQuantity(), - base_pos->first->getNameAsString().c_str()); - } + { + llvm::DenseMap::const_iterator base_pos, + base_end = layout_info.base_offsets.end(); + for (idx = 0, base_pos = layout_info.base_offsets.begin(); + base_pos != base_end; ++base_pos, ++idx) { + module_sp->LogMessage( + log, + "ClangASTContext::CompleteTypeFromDWARF (clang_type = " + "%p) base[%u] = { byte_offset=%u, name='%s' }", + clang_type.GetOpaqueQualType(), idx, + (uint32_t)base_pos->second.getQuantity(), + base_pos->first->getNameAsString().c_str()); } - { - llvm::DenseMap::const_iterator vbase_pos, - vbase_end = layout_info.vbase_offsets.end(); - for (idx = 0, vbase_pos = layout_info.vbase_offsets.begin(); - vbase_pos != vbase_end; ++vbase_pos, ++idx) { - module_sp->LogMessage( - log, "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) vbase[%u] = { byte_offset=%u, name='%s' }", - static_cast(clang_type.GetOpaqueQualType()), idx, - static_cast(vbase_pos->second.getQuantity()), - vbase_pos->first->getNameAsString().c_str()); - } + } + { + llvm::DenseMap::const_iterator vbase_pos, + vbase_end = layout_info.vbase_offsets.end(); + for (idx = 0, vbase_pos = layout_info.vbase_offsets.begin(); + vbase_pos != vbase_end; ++vbase_pos, ++idx) { + module_sp->LogMessage( + log, + "ClangASTContext::CompleteTypeFromDWARF (clang_type = " + "%p) vbase[%u] = { byte_offset=%u, name='%s' }", + static_cast(clang_type.GetOpaqueQualType()), idx, + static_cast(vbase_pos->second.getQuantity()), + vbase_pos->first->getNameAsString().c_str()); } } } - GetClangASTImporter().InsertRecordDecl(record_decl, layout_info); } + GetClangASTImporter().InsertRecordDecl(record_decl, layout_info); } } - return (bool)clang_type; + return (bool)clang_type; +} - case DW_TAG_enumeration_type: - if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { - if (die.HasChildren()) { - bool is_signed = false; - clang_type.IsIntegerType(is_signed); - ParseChildEnumerators(clang_type, is_signed, - type->GetByteSize().getValueOr(0), die); - } - ClangASTContext::CompleteTagDeclarationDefinition(clang_type); +bool DWARFASTParserClang::CompleteEnumType(const DWARFDIE &die, + lldb_private::Type *type, + CompilerType &clang_type) { + if (ClangASTContext::StartTagDeclarationDefinition(clang_type)) { + if (die.HasChildren()) { + bool is_signed = false; + clang_type.IsIntegerType(is_signed); + ParseChildEnumerators(clang_type, is_signed, + type->GetByteSize().getValueOr(0), die); } - return (bool)clang_type; + ClangASTContext::CompleteTagDeclarationDefinition(clang_type); + } + return (bool)clang_type; +} +bool DWARFASTParserClang::CompleteTypeFromDWARF(const DWARFDIE &die, + lldb_private::Type *type, + CompilerType &clang_type) { + SymbolFileDWARF *dwarf = die.GetDWARF(); + + std::lock_guard guard( + dwarf->GetObjectFile()->GetModule()->GetMutex()); + + // Disable external storage for this type so we don't get anymore + // clang::ExternalASTSource queries for this type. + m_ast.SetHasExternalStorage(clang_type.GetOpaqueQualType(), false); + + if (!die) + return false; + + const dw_tag_t tag = die.Tag(); + + Log *log = + nullptr; // (LogChannelDWARF::GetLogIfAny(DWARF_LOG_DEBUG_INFO|DWARF_LOG_TYPE_COMPLETION)); + if (log) + dwarf->GetObjectFile()->GetModule()->LogMessageVerboseBacktrace( + log, "0x%8.8" PRIx64 ": %s '%s' resolving forward declaration...", + die.GetID(), die.GetTagAsCString(), type->GetName().AsCString()); + assert(clang_type); + DWARFAttributes attributes; + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + case DW_TAG_class_type: + return CompleteRecordType(die, type, clang_type); + case DW_TAG_enumeration_type: + return CompleteEnumType(die, type, clang_type); default: assert(false && "not a forward clang type decl!"); break; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index a8963bbbca1bf..31d76c1852123 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -170,6 +170,11 @@ class DWARFASTParserClang : public DWARFASTParser { lldb::ModuleSP GetModuleForType(const DWARFDIE &die); private: + bool CompleteRecordType(const DWARFDIE &die, lldb_private::Type *type, + lldb_private::CompilerType &clang_type); + bool CompleteEnumType(const DWARFDIE &die, lldb_private::Type *type, + lldb_private::CompilerType &clang_type); + lldb::TypeSP ParseTypeModifier(const lldb_private::SymbolContext &sc, const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); From 943d8326dd35f5f9bc79c2eb6463fb3b5ed887ff Mon Sep 17 00:00:00 2001 From: David Stuttard Date: Thu, 21 Nov 2019 11:31:41 +0000 Subject: [PATCH 164/591] AMDGPU: Fix lit test checks with dag option Summary: I was seeing some failures on a test with slightly different instruction ordering. Adding in some DAG directives solved the issue. Change-Id: If5a3d3969055fb19279943bd45161bb70a3dabce Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70531 --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll | 26 ++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll index 7b34d873f7a74..25742666a5794 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -1297,8 +1297,30 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_vecarg: ; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 -; GCN-COUNT-8: global_load_dwordx4 -; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: global_load_dwordx4 +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-COUNT-32: v_accvgpr_read_b32 ; GCN-COUNT-8: global_store_dwordx4 From f286f2dda4d2b05b61964f5c9373f36493fbb190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 28 Nov 2019 13:18:15 +0200 Subject: [PATCH 165/591] [LLDB] [test] Add a missing "REQUIRES: arm" line --- lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml b/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml index dec2357596918..5515824e776bc 100644 --- a/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml +++ b/lldb/test/Shell/ObjectFile/PECOFF/disassemble-thumb.yaml @@ -1,3 +1,5 @@ +# REQUIRES: arm + # RUN: yaml2obj %s > %t.exe # RUN: %lldb %t.exe -o "disassemble -b -n entry" -b | FileCheck %s From 9d2679152a4bbe892f72802427657bfdca85a63b Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 12:24:08 +0100 Subject: [PATCH 166/591] [lldb][NFC] Make GetAsCXXRecordDecl static All other casting functions there are static, so this should be too. --- lldb/include/lldb/Symbol/ClangASTContext.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h index 20421bca305ed..7018f3b71b4f4 100644 --- a/lldb/include/lldb/Symbol/ClangASTContext.h +++ b/lldb/include/lldb/Symbol/ClangASTContext.h @@ -908,7 +908,8 @@ class ClangASTContext : public TypeSystem { static clang::TypedefNameDecl *GetAsTypedefDecl(const CompilerType &type); - clang::CXXRecordDecl *GetAsCXXRecordDecl(lldb::opaque_compiler_type_t type); + static clang::CXXRecordDecl * + GetAsCXXRecordDecl(lldb::opaque_compiler_type_t type); static clang::ObjCInterfaceDecl * GetAsObjCInterfaceDecl(const CompilerType &type); From ee79feaec3ed44b21654936baf44561f5f726dfc Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 12:45:47 +0100 Subject: [PATCH 167/591] [lldb][NFC] Remove forward declaration of PrivateAutoCompleteMembers That's declared directly above the actual definition, so it serves no use. --- lldb/source/Symbol/Variable.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lldb/source/Symbol/Variable.cpp b/lldb/source/Symbol/Variable.cpp index a2eeaa1d2a5b2..6e4b87c47700c 100644 --- a/lldb/source/Symbol/Variable.cpp +++ b/lldb/source/Symbol/Variable.cpp @@ -487,13 +487,6 @@ static void PrivateAutoComplete( &prefix_path, // Anything that has been resolved already will be in here const CompilerType &compiler_type, CompletionRequest &request); -static void PrivateAutoCompleteMembers( - StackFrame *frame, const std::string &partial_member_name, - llvm::StringRef partial_path, - const llvm::Twine - &prefix_path, // Anything that has been resolved already will be in here - const CompilerType &compiler_type, CompletionRequest &request); - static void PrivateAutoCompleteMembers( StackFrame *frame, const std::string &partial_member_name, llvm::StringRef partial_path, From 08cce03a6d959c899e07398603c85168a96f549c Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 28 Nov 2019 11:39:48 +0100 Subject: [PATCH 168/591] [clangd] Tweak the no-index error message for rename, NFC. Summary: The current error message doesn't fit well for cross-file rename. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70809 --- clang-tools-extra/clangd/refactor/Rename.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index ab121d434c9c3..f775539cb63dd 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -186,7 +186,7 @@ llvm::Error makeError(ReasonToReject Reason) { case ReasonToReject::NoSymbolFound: return "there is no symbol at the given location"; case ReasonToReject::NoIndexProvided: - return "symbol may be used in other files (no index available)"; + return "no index provided"; case ReasonToReject::UsedOutsideFile: return "the symbol is used outside main file"; case ReasonToReject::NonIndexable: From 2330cee82f0aa06e8063189fe7a68db3e51f3054 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 28 Nov 2019 11:24:04 +0100 Subject: [PATCH 169/591] [clangd] Prefer the left character if the character on the right of the cursor is semicolon. Summary: This would make go-to-def works on the cases like int A = abc^; Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70807 --- clang-tools-extra/clangd/Selection.cpp | 5 +++-- clang-tools-extra/clangd/unittests/SelectionTests.cpp | 2 +- .../clangd/unittests/SemanticSelectionTests.cpp | 5 +---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index c91cd24e2f25f..5b29b916b33ce 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -513,8 +513,9 @@ static std::pair pointBounds(unsigned Offset, FileID FID, return {Offset - 1, Offset}; // We could choose either this byte or the previous. Usually we prefer the // character on the right of the cursor (or under a block cursor). - // But if that's whitespace, we likely want the token on the left. - if (isWhitespace(Buf[Offset]) && !isWhitespace(Buf[Offset - 1])) + // But if that's whitespace/semicolon, we likely want the token on the left. + auto IsIgnoredChar = [](char C) { return isWhitespace(C) || C == ';'; }; + if (IsIgnoredChar(Buf[Offset]) && !IsIgnoredChar(Buf[Offset - 1])) return {Offset - 1, Offset}; return {Offset, Offset + 1}; } diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp index 2803aaaca1c57..6f4ccd88b978e 100644 --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -234,6 +234,7 @@ TEST(SelectionTest, CommonAncestor) { {"void foo() { [[foo^()]]; }", "CallExpr"}, {"void foo() { [[foo^]] (); }", "DeclRefExpr"}, {"int bar; void foo() [[{ foo (); }]]^", "CompoundStmt"}, + {"int x = [[42]]^;", "IntegerLiteral"}, // Ignores whitespace, comments, and semicolons in the selection. {"void foo() { [[foo^()]]; /*comment*/^}", "CallExpr"}, @@ -271,7 +272,6 @@ TEST(SelectionTest, CommonAncestor) { // FIXME: Ideally we'd get a declstmt or the VarDecl itself here. // This doesn't happen now; the RAV doesn't traverse a node containing ;. {"int x = 42;^", nullptr}, - {"int x = 42^;", nullptr}, // Common ancestor is logically TUDecl, but we never return that. {"^int x; int y;^", nullptr}, diff --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp index b9ca0273a8233..f518fea672920 100644 --- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp @@ -88,11 +88,8 @@ TEST(SemanticSelection, All) { R"cpp( // Single statement in TU. [[int v = [[1^00]]]]; )cpp", - // FIXME: No node found associated to the position. R"cpp( // Cursor at end of VarDecl. - void func() { - int v = 100 + 100^; - } + [[int v = [[100]]^]]; )cpp", // FIXME: No node found associated to the position. R"cpp( // Cursor in between spaces. From 2e3c040ee062741472233c1de2dbf135bcee5c7a Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 13:27:25 +0100 Subject: [PATCH 170/591] [lldb][NFC] Remove unused CStringToDIEMap typedef --- lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h index d1b066ffe80cb..056cf33a202f1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfo.h @@ -16,7 +16,6 @@ #include "DWARFTypeUnit.h" #include "DWARFUnit.h" #include "SymbolFileDWARF.h" -#include "lldb/Core/STLUtils.h" #include "lldb/lldb-private.h" #include "llvm/Support/Error.h" @@ -24,11 +23,6 @@ namespace lldb_private { class DWARFContext; } -typedef std::multimap - CStringToDIEMap; -typedef CStringToDIEMap::iterator CStringToDIEMapIter; -typedef CStringToDIEMap::const_iterator CStringToDIEMapConstIter; - class DWARFDebugInfo { public: typedef dw_offset_t (*Callback)(SymbolFileDWARF *dwarf2Data, From 3c3aca245e67fa70b6f49b9062983fbdf120ba04 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 28 Nov 2019 12:47:32 +0100 Subject: [PATCH 171/591] [clangd] Don't perform rename when the refs result from index is incomplete. Summary: Also do an early return if the number of affected files > limit to save some unnecessary FileURI computations. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70811 --- clang-tools-extra/clangd/refactor/Rename.cpp | 36 ++++++++++++------- .../clangd/unittests/RenameTests.cpp | 28 +++++++++++++++ 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index f775539cb63dd..e57bf61dc2e5c 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -281,20 +281,37 @@ Range toRange(const SymbolLocation &L) { // Return all rename occurrences (per the index) outside of the main file, // grouped by the absolute file path. -llvm::StringMap> +llvm::Expected>> findOccurrencesOutsideFile(const NamedDecl &RenameDecl, llvm::StringRef MainFile, const SymbolIndex &Index) { RefsRequest RQuest; RQuest.IDs.insert(*getSymbolID(&RenameDecl)); - // Absolute file path => rename ocurrences in that file. + // Absolute file path => rename occurrences in that file. llvm::StringMap> AffectedFiles; - Index.refs(RQuest, [&](const Ref &R) { + // FIXME: make the limit customizable. + static constexpr size_t MaxLimitFiles = 50; + bool HasMore = Index.refs(RQuest, [&](const Ref &R) { + if (AffectedFiles.size() > MaxLimitFiles) + return; if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) { if (*RefFilePath != MainFile) AffectedFiles[*RefFilePath].push_back(toRange(R.Location)); } }); + + if (AffectedFiles.size() > MaxLimitFiles) + return llvm::make_error( + llvm::formatv("The number of affected files exceeds the max limit {0}", + MaxLimitFiles), + llvm::inconvertibleErrorCode()); + if (HasMore) { + return llvm::make_error( + llvm::formatv("The symbol {0} has too many occurrences", + RenameDecl.getQualifiedNameAsString()), + llvm::inconvertibleErrorCode()); + } + return AffectedFiles; } @@ -321,17 +338,10 @@ llvm::Expected renameOutsideFile( llvm::function_ref(PathRef)> GetFileContent) { auto AffectedFiles = findOccurrencesOutsideFile(RenameDecl, MainFilePath, Index); - // FIXME: make the limit customizable. - static constexpr size_t MaxLimitFiles = 50; - if (AffectedFiles.size() >= MaxLimitFiles) - return llvm::make_error( - llvm::formatv( - "The number of affected files exceeds the max limit {0}: {1}", - MaxLimitFiles, AffectedFiles.size()), - llvm::inconvertibleErrorCode()); - + if (!AffectedFiles) + return AffectedFiles.takeError(); FileEdits Results; - for (auto &FileAndOccurrences : AffectedFiles) { + for (auto &FileAndOccurrences : *AffectedFiles) { llvm::StringRef FilePath = FileAndOccurrences.first(); auto AffectedFileCode = GetFileContent(FilePath); diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 47aca380f3e9d..89efb32a2bb53 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -621,6 +621,34 @@ TEST(RenameTests, CrossFile) { UnorderedElementsAre( Pair(Eq(BarPath), Eq(expectedResult(BarCode, NewName))), Pair(Eq(MainFilePath), Eq(expectedResult(MainCode, NewName))))); + + // Run rename on a pagination index which couldn't return all refs in one + // request, we reject rename on this case. + class PaginationIndex : public SymbolIndex { + bool refs(const RefsRequest &Req, + llvm::function_ref Callback) const override { + return true; // has more references + } + + bool fuzzyFind( + const FuzzyFindRequest &Req, + llvm::function_ref Callback) const override { + return false; + } + void + lookup(const LookupRequest &Req, + llvm::function_ref Callback) const override {} + + void relations(const RelationsRequest &Req, + llvm::function_ref + Callback) const override {} + size_t estimateMemoryUsage() const override { return 0; } + } PIndex; + Results = rename({MainCode.point(), NewName, AST, MainFilePath, &PIndex, + /*CrossFile=*/true, GetDirtyBuffer}); + EXPECT_FALSE(Results); + EXPECT_THAT(llvm::toString(Results.takeError()), + testing::HasSubstr("too many occurrences")); } TEST(CrossFileRenameTests, CrossFileOnLocalSymbol) { From 373e2a4f69d623e59329ff801f261d8b299e12d2 Mon Sep 17 00:00:00 2001 From: Konrad Kleine Date: Wed, 27 Nov 2019 10:57:06 +0100 Subject: [PATCH 172/591] [lldb] NFC: refactor CompileUnit::ResolveSymbolContext Summary: I found the above named method hard to read because it had a) many nested blocks and b) one return statement at the end with some logic involved. I decided to refactor this function by employing an early exit strategy. In order to capture the logic in the return statement and to not have it repeated more than once I chose to implement a very small lamda function that captures all the variables it needs. This is a non-functional change (NFC). Reviewers: jdoerfert Subscribers: lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D70774 --- lldb/include/lldb/Symbol/CompileUnit.h | 11 +- lldb/source/API/SBThread.cpp | 7 +- lldb/source/Core/AddressResolverFileLine.cpp | 7 +- lldb/source/Symbol/CompileUnit.cpp | 136 ++++++++----------- 4 files changed, 71 insertions(+), 90 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h index 7efbf792b1a92..b5f37f6789007 100644 --- a/lldb/include/lldb/Symbol/CompileUnit.h +++ b/lldb/include/lldb/Symbol/CompileUnit.h @@ -381,14 +381,11 @@ class CompileUnit : public std::enable_shared_from_this, /// A SymbolContext list class that will get any matching /// entries appended to. /// - /// \return - /// The number of new matches that were added to \a sc_list. - /// /// \see enum SymbolContext::Scope - uint32_t ResolveSymbolContext(const FileSpec &file_spec, uint32_t line, - bool check_inlines, bool exact, - lldb::SymbolContextItem resolve_scope, - SymbolContextList &sc_list); + void ResolveSymbolContext(const FileSpec &file_spec, uint32_t line, + bool check_inlines, bool exact, + lldb::SymbolContextItem resolve_scope, + SymbolContextList &sc_list); /// Get whether compiler optimizations were enabled for this compile unit /// diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index 8d4930bf6edb0..2dada9a6118db 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -914,9 +914,10 @@ SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame, const bool exact = false; SymbolContextList sc_list; - const uint32_t num_matches = frame_sc.comp_unit->ResolveSymbolContext( - step_file_spec, line, check_inlines, exact, eSymbolContextLineEntry, - sc_list); + frame_sc.comp_unit->ResolveSymbolContext(step_file_spec, line, + check_inlines, exact, + eSymbolContextLineEntry, sc_list); + const uint32_t num_matches = sc_list.GetSize(); if (num_matches > 0) { SymbolContext sc; for (uint32_t i = 0; i < num_matches; ++i) { diff --git a/lldb/source/Core/AddressResolverFileLine.cpp b/lldb/source/Core/AddressResolverFileLine.cpp index 4a14260c6c72f..4122b5d3b747d 100644 --- a/lldb/source/Core/AddressResolverFileLine.cpp +++ b/lldb/source/Core/AddressResolverFileLine.cpp @@ -40,14 +40,13 @@ Searcher::CallbackReturn AddressResolverFileLine::SearchCallback(SearchFilter &filter, SymbolContext &context, Address *addr) { SymbolContextList sc_list; - uint32_t sc_list_size; CompileUnit *cu = context.comp_unit; Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_BREAKPOINTS)); - sc_list_size = - cu->ResolveSymbolContext(m_file_spec, m_line_number, m_inlines, false, - eSymbolContextEverything, sc_list); + cu->ResolveSymbolContext(m_file_spec, m_line_number, m_inlines, false, + eSymbolContextEverything, sc_list); + uint32_t sc_list_size = sc_list.GetSize(); for (uint32_t i = 0; i < sc_list_size; i++) { SymbolContext sc; if (sc_list.GetContextAtIndex(i, sc)) { diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index b37636c3bafc1..62a1d690da42f 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -244,11 +244,11 @@ uint32_t CompileUnit::FindLineEntry(uint32_t start_idx, uint32_t line, return UINT32_MAX; } -uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, - uint32_t line, bool check_inlines, - bool exact, - SymbolContextItem resolve_scope, - SymbolContextList &sc_list) { +void CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, + uint32_t line, bool check_inlines, + bool exact, + SymbolContextItem resolve_scope, + SymbolContextList &sc_list) { // First find all of the file indexes that match our "file_spec". If // "file_spec" has an empty directory, then only compare the basenames when // finding file indexes @@ -260,7 +260,7 @@ uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, // If we are not looking for inlined functions and our file spec doesn't // match then we are done... if (!file_spec_matches_cu_file_spec && !check_inlines) - return 0; + return; uint32_t file_idx = GetSupportFiles().FindFileIndex(1, file_spec, true); @@ -271,84 +271,68 @@ uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, const size_t num_file_indexes = file_indexes.size(); if (num_file_indexes == 0) - return 0; - - const uint32_t prev_size = sc_list.GetSize(); + return; SymbolContext sc(GetModule()); sc.comp_unit = this; - if (line != 0) { - LineTable *line_table = sc.comp_unit->GetLineTable(); - - if (line_table != nullptr) { - uint32_t found_line; - uint32_t line_idx; - - if (num_file_indexes == 1) { - // We only have a single support file that matches, so use the line - // table function that searches for a line entries that match a single - // support file index - LineEntry line_entry; - line_idx = line_table->FindLineEntryIndexByFileIndex( - 0, file_indexes.front(), line, exact, &line_entry); - - // If "exact == true", then "found_line" will be the same as "line". If - // "exact == false", the "found_line" will be the closest line entry - // with a line number greater than "line" and we will use this for our - // subsequent line exact matches below. - found_line = line_entry.line; - - while (line_idx != UINT32_MAX) { - // If they only asked for the line entry, then we're done, we can - // just copy that over. But if they wanted more than just the line - // number, fill it in. - if (resolve_scope == eSymbolContextLineEntry) { - sc.line_entry = line_entry; - } else { - line_entry.range.GetBaseAddress().CalculateSymbolContext( - &sc, resolve_scope); - } - - sc_list.Append(sc); - line_idx = line_table->FindLineEntryIndexByFileIndex( - line_idx + 1, file_indexes.front(), found_line, true, - &line_entry); - } - } else { - // We found multiple support files that match "file_spec" so use the - // line table function that searches for a line entries that match a - // multiple support file indexes. - LineEntry line_entry; - line_idx = line_table->FindLineEntryIndexByFileIndex( - 0, file_indexes, line, exact, &line_entry); - - // If "exact == true", then "found_line" will be the same as "line". If - // "exact == false", the "found_line" will be the closest line entry - // with a line number greater than "line" and we will use this for our - // subsequent line exact matches below. - found_line = line_entry.line; - - while (line_idx != UINT32_MAX) { - if (resolve_scope == eSymbolContextLineEntry) { - sc.line_entry = line_entry; - } else { - line_entry.range.GetBaseAddress().CalculateSymbolContext( - &sc, resolve_scope); - } - - sc_list.Append(sc); - line_idx = line_table->FindLineEntryIndexByFileIndex( - line_idx + 1, file_indexes, found_line, true, &line_entry); - } - } - } - } else if (file_spec_matches_cu_file_spec && !check_inlines) { + if (line == 0) + return; + + if (file_spec_matches_cu_file_spec && !check_inlines) { // only append the context if we aren't looking for inline call sites by // file and line and if the file spec matches that of the compile unit sc_list.Append(sc); + return; + } + + LineTable *line_table = sc.comp_unit->GetLineTable(); + + if (line_table == nullptr) + return; + + uint32_t line_idx; + LineEntry line_entry; + + if (num_file_indexes == 1) { + // We only have a single support file that matches, so use the line + // table function that searches for a line entries that match a single + // support file index + line_idx = line_table->FindLineEntryIndexByFileIndex( + 0, file_indexes.front(), line, exact, &line_entry); + } else { + // We found multiple support files that match "file_spec" so use the + // line table function that searches for a line entries that match a + // multiple support file indexes. + line_idx = line_table->FindLineEntryIndexByFileIndex(0, file_indexes, line, + exact, &line_entry); + } + + // If "exact == true", then "found_line" will be the same as "line". If + // "exact == false", the "found_line" will be the closest line entry + // with a line number greater than "line" and we will use this for our + // subsequent line exact matches below. + uint32_t found_line = line_entry.line; + + while (line_idx != UINT32_MAX) { + // If they only asked for the line entry, then we're done, we can + // just copy that over. But if they wanted more than just the line + // number, fill it in. + if (resolve_scope == eSymbolContextLineEntry) { + sc.line_entry = line_entry; + } else { + line_entry.range.GetBaseAddress().CalculateSymbolContext(&sc, + resolve_scope); + } + + sc_list.Append(sc); + if (num_file_indexes == 1) + line_idx = line_table->FindLineEntryIndexByFileIndex( + line_idx + 1, file_indexes.front(), found_line, true, &line_entry); + else + line_idx = line_table->FindLineEntryIndexByFileIndex( + line_idx + 1, file_indexes, found_line, true, &line_entry); } - return sc_list.GetSize() - prev_size; } bool CompileUnit::GetIsOptimized() { From a54ef8af89c78f7296bea6ffabb7728ef563bec1 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 13:41:18 +0100 Subject: [PATCH 173/591] [lldb][NFC] Use llvm::StringRef instead of C-strings as multimap key --- lldb/source/Symbol/Symtab.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 9a2b5cddd73b7..c7a6bf2145267 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -13,7 +13,6 @@ #include "lldb/Core/Module.h" #include "lldb/Core/RichManglingContext.h" -#include "lldb/Core/STLUtils.h" #include "lldb/Core/Section.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Symbol/Symbol.h" @@ -107,10 +106,8 @@ void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order, // sorted by name. So we must make the ordered symbol list up ourselves. s->PutCString(" (sorted by name):\n"); DumpSymbolHeader(s); - typedef std::multimap - CStringToSymbol; - CStringToSymbol name_map; + + std::multimap name_map; for (const_iterator pos = m_symbols.begin(), end = m_symbols.end(); pos != end; ++pos) { const char *name = pos->GetName().AsCString(); @@ -118,12 +115,10 @@ void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order, name_map.insert(std::make_pair(name, &(*pos))); } - for (CStringToSymbol::const_iterator pos = name_map.begin(), - end = name_map.end(); - pos != end; ++pos) { + for (const auto &name_to_symbol : name_map) { + const Symbol *symbol = name_to_symbol.second; s->Indent(); - pos->second->Dump(s, target, pos->second - &m_symbols[0], - name_preference); + symbol->Dump(s, target, symbol - &m_symbols[0], name_preference); } } break; From 42c857aa4783824183d55e8a6ede488d69349806 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 14:07:44 +0100 Subject: [PATCH 174/591] [lldb][NFC] Remove unused STLUtil include and STLUtil.h header --- lldb/include/lldb/Core/STLUtils.h | 26 ------------------- .../lldb/Interpreter/CommandReturnObject.h | 1 - 2 files changed, 27 deletions(-) delete mode 100644 lldb/include/lldb/Core/STLUtils.h diff --git a/lldb/include/lldb/Core/STLUtils.h b/lldb/include/lldb/Core/STLUtils.h deleted file mode 100644 index f9500aa5594ed..0000000000000 --- a/lldb/include/lldb/Core/STLUtils.h +++ /dev/null @@ -1,26 +0,0 @@ -//===-- STLUtils.h ----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef liblldb_STLUtils_h_ -#define liblldb_STLUtils_h_ - -#include - -#include -#include -#include - - -// C string less than compare function object -struct CStringCompareFunctionObject { - bool operator()(const char *s1, const char *s2) const { - return strcmp(s1, s2) < 0; - } -}; - -#endif // liblldb_STLUtils_h_ diff --git a/lldb/include/lldb/Interpreter/CommandReturnObject.h b/lldb/include/lldb/Interpreter/CommandReturnObject.h index 61e57fb798a1d..8af76e07e5ae1 100644 --- a/lldb/include/lldb/Interpreter/CommandReturnObject.h +++ b/lldb/include/lldb/Interpreter/CommandReturnObject.h @@ -9,7 +9,6 @@ #ifndef liblldb_CommandReturnObject_h_ #define liblldb_CommandReturnObject_h_ -#include "lldb/Core/STLUtils.h" #include "lldb/Core/StreamFile.h" #include "lldb/Utility/StreamString.h" #include "lldb/Utility/StreamTee.h" From 66237889a79f728fffc96394740b975774de26bf Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 28 Nov 2019 14:21:33 +0100 Subject: [PATCH 175/591] [include-fixer] Python 3 support for clang-include-fixer.py Patch by Yannick Brehon! --- .../tool/clang-include-fixer.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clang-include-fixer/tool/clang-include-fixer.py b/clang-tools-extra/clang-include-fixer/tool/clang-include-fixer.py index df05101e4fd8c..fcdd5a0b60ee4 100644 --- a/clang-tools-extra/clang-include-fixer/tool/clang-include-fixer.py +++ b/clang-tools-extra/clang-include-fixer/tool/clang-include-fixer.py @@ -17,6 +17,7 @@ # It operates on the current, potentially unsaved buffer and does not create # or save any files. To revert a fix, just undo. +from __future__ import print_function import argparse import difflib import json @@ -79,7 +80,7 @@ def GetUserSelection(message, headers, maximum_suggested_headers): except Exception: # Show a new prompt on invalid option instead of aborting so that users # don't need to wait for another clang-include-fixer run. - print >> sys.stderr, "Invalid option:", res + print("Invalid option: {}".format(res), file=sys.stderr) return GetUserSelection(message, headers, maximum_suggested_headers) return headers[idx - 1] @@ -95,7 +96,7 @@ def execute(command, text): p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, startupinfo=startupinfo) - return p.communicate(input=text) + return p.communicate(input=text.encode('utf-8')) def InsertHeaderToVimBuffer(header, text): @@ -159,7 +160,7 @@ def main(): if query_mode: symbol = get_symbol_under_cursor() if len(symbol) == 0: - print "Skip querying empty symbol." + print("Skip querying empty symbol.") return command = [binary, "-stdin", "-query-symbol="+get_symbol_under_cursor(), "-db=" + args.db, "-input=" + args.input, @@ -170,13 +171,14 @@ def main(): "-input=" + args.input, vim.current.buffer.name] stdout, stderr = execute(command, text) if stderr: - print >> sys.stderr, "Error while running clang-include-fixer: " + stderr + print("Error while running clang-include-fixer: {}".format(stderr), + file=sys.stderr) return include_fixer_context = json.loads(stdout) query_symbol_infos = include_fixer_context["QuerySymbolInfos"] if not query_symbol_infos: - print "The file is fine, no need to add a header." + print("The file is fine, no need to add a header.") return symbol = query_symbol_infos[0]["RawIdentifier"] # The header_infos is already sorted by clang-include-fixer. @@ -192,7 +194,7 @@ def main(): unique_headers.append(header) if not unique_headers: - print "Couldn't find a header for {0}.".format(symbol) + print("Couldn't find a header for {0}.".format(symbol)) return try: @@ -207,9 +209,9 @@ def main(): include_fixer_context["HeaderInfos"] = inserted_header_infos InsertHeaderToVimBuffer(include_fixer_context, text) - print "Added #include {0} for {1}.".format(selected, symbol) + print("Added #include {0} for {1}.".format(selected, symbol)) except Exception as error: - print >> sys.stderr, error.message + print(error.message, file=sys.stderr) return From 50e2ffa18da4247e4d45f421c3271b58b936c869 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 14:25:46 +0100 Subject: [PATCH 176/591] Revert "[lldb] NFC: refactor CompileUnit::ResolveSymbolContext" This reverts commit 373e2a4f69d623e59329ff801f261d8b299e12d2. This broke breakpoint setting. --- lldb/include/lldb/Symbol/CompileUnit.h | 11 +- lldb/source/API/SBThread.cpp | 7 +- lldb/source/Core/AddressResolverFileLine.cpp | 7 +- lldb/source/Symbol/CompileUnit.cpp | 136 +++++++++++-------- 4 files changed, 90 insertions(+), 71 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h index b5f37f6789007..7efbf792b1a92 100644 --- a/lldb/include/lldb/Symbol/CompileUnit.h +++ b/lldb/include/lldb/Symbol/CompileUnit.h @@ -381,11 +381,14 @@ class CompileUnit : public std::enable_shared_from_this, /// A SymbolContext list class that will get any matching /// entries appended to. /// + /// \return + /// The number of new matches that were added to \a sc_list. + /// /// \see enum SymbolContext::Scope - void ResolveSymbolContext(const FileSpec &file_spec, uint32_t line, - bool check_inlines, bool exact, - lldb::SymbolContextItem resolve_scope, - SymbolContextList &sc_list); + uint32_t ResolveSymbolContext(const FileSpec &file_spec, uint32_t line, + bool check_inlines, bool exact, + lldb::SymbolContextItem resolve_scope, + SymbolContextList &sc_list); /// Get whether compiler optimizations were enabled for this compile unit /// diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index 2dada9a6118db..8d4930bf6edb0 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -914,10 +914,9 @@ SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame, const bool exact = false; SymbolContextList sc_list; - frame_sc.comp_unit->ResolveSymbolContext(step_file_spec, line, - check_inlines, exact, - eSymbolContextLineEntry, sc_list); - const uint32_t num_matches = sc_list.GetSize(); + const uint32_t num_matches = frame_sc.comp_unit->ResolveSymbolContext( + step_file_spec, line, check_inlines, exact, eSymbolContextLineEntry, + sc_list); if (num_matches > 0) { SymbolContext sc; for (uint32_t i = 0; i < num_matches; ++i) { diff --git a/lldb/source/Core/AddressResolverFileLine.cpp b/lldb/source/Core/AddressResolverFileLine.cpp index 4122b5d3b747d..4a14260c6c72f 100644 --- a/lldb/source/Core/AddressResolverFileLine.cpp +++ b/lldb/source/Core/AddressResolverFileLine.cpp @@ -40,13 +40,14 @@ Searcher::CallbackReturn AddressResolverFileLine::SearchCallback(SearchFilter &filter, SymbolContext &context, Address *addr) { SymbolContextList sc_list; + uint32_t sc_list_size; CompileUnit *cu = context.comp_unit; Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_BREAKPOINTS)); - cu->ResolveSymbolContext(m_file_spec, m_line_number, m_inlines, false, - eSymbolContextEverything, sc_list); - uint32_t sc_list_size = sc_list.GetSize(); + sc_list_size = + cu->ResolveSymbolContext(m_file_spec, m_line_number, m_inlines, false, + eSymbolContextEverything, sc_list); for (uint32_t i = 0; i < sc_list_size; i++) { SymbolContext sc; if (sc_list.GetContextAtIndex(i, sc)) { diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index 62a1d690da42f..b37636c3bafc1 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -244,11 +244,11 @@ uint32_t CompileUnit::FindLineEntry(uint32_t start_idx, uint32_t line, return UINT32_MAX; } -void CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, - uint32_t line, bool check_inlines, - bool exact, - SymbolContextItem resolve_scope, - SymbolContextList &sc_list) { +uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, + uint32_t line, bool check_inlines, + bool exact, + SymbolContextItem resolve_scope, + SymbolContextList &sc_list) { // First find all of the file indexes that match our "file_spec". If // "file_spec" has an empty directory, then only compare the basenames when // finding file indexes @@ -260,7 +260,7 @@ void CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, // If we are not looking for inlined functions and our file spec doesn't // match then we are done... if (!file_spec_matches_cu_file_spec && !check_inlines) - return; + return 0; uint32_t file_idx = GetSupportFiles().FindFileIndex(1, file_spec, true); @@ -271,68 +271,84 @@ void CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, const size_t num_file_indexes = file_indexes.size(); if (num_file_indexes == 0) - return; + return 0; + + const uint32_t prev_size = sc_list.GetSize(); SymbolContext sc(GetModule()); sc.comp_unit = this; - if (line == 0) - return; - - if (file_spec_matches_cu_file_spec && !check_inlines) { + if (line != 0) { + LineTable *line_table = sc.comp_unit->GetLineTable(); + + if (line_table != nullptr) { + uint32_t found_line; + uint32_t line_idx; + + if (num_file_indexes == 1) { + // We only have a single support file that matches, so use the line + // table function that searches for a line entries that match a single + // support file index + LineEntry line_entry; + line_idx = line_table->FindLineEntryIndexByFileIndex( + 0, file_indexes.front(), line, exact, &line_entry); + + // If "exact == true", then "found_line" will be the same as "line". If + // "exact == false", the "found_line" will be the closest line entry + // with a line number greater than "line" and we will use this for our + // subsequent line exact matches below. + found_line = line_entry.line; + + while (line_idx != UINT32_MAX) { + // If they only asked for the line entry, then we're done, we can + // just copy that over. But if they wanted more than just the line + // number, fill it in. + if (resolve_scope == eSymbolContextLineEntry) { + sc.line_entry = line_entry; + } else { + line_entry.range.GetBaseAddress().CalculateSymbolContext( + &sc, resolve_scope); + } + + sc_list.Append(sc); + line_idx = line_table->FindLineEntryIndexByFileIndex( + line_idx + 1, file_indexes.front(), found_line, true, + &line_entry); + } + } else { + // We found multiple support files that match "file_spec" so use the + // line table function that searches for a line entries that match a + // multiple support file indexes. + LineEntry line_entry; + line_idx = line_table->FindLineEntryIndexByFileIndex( + 0, file_indexes, line, exact, &line_entry); + + // If "exact == true", then "found_line" will be the same as "line". If + // "exact == false", the "found_line" will be the closest line entry + // with a line number greater than "line" and we will use this for our + // subsequent line exact matches below. + found_line = line_entry.line; + + while (line_idx != UINT32_MAX) { + if (resolve_scope == eSymbolContextLineEntry) { + sc.line_entry = line_entry; + } else { + line_entry.range.GetBaseAddress().CalculateSymbolContext( + &sc, resolve_scope); + } + + sc_list.Append(sc); + line_idx = line_table->FindLineEntryIndexByFileIndex( + line_idx + 1, file_indexes, found_line, true, &line_entry); + } + } + } + } else if (file_spec_matches_cu_file_spec && !check_inlines) { // only append the context if we aren't looking for inline call sites by // file and line and if the file spec matches that of the compile unit sc_list.Append(sc); - return; - } - - LineTable *line_table = sc.comp_unit->GetLineTable(); - - if (line_table == nullptr) - return; - - uint32_t line_idx; - LineEntry line_entry; - - if (num_file_indexes == 1) { - // We only have a single support file that matches, so use the line - // table function that searches for a line entries that match a single - // support file index - line_idx = line_table->FindLineEntryIndexByFileIndex( - 0, file_indexes.front(), line, exact, &line_entry); - } else { - // We found multiple support files that match "file_spec" so use the - // line table function that searches for a line entries that match a - // multiple support file indexes. - line_idx = line_table->FindLineEntryIndexByFileIndex(0, file_indexes, line, - exact, &line_entry); - } - - // If "exact == true", then "found_line" will be the same as "line". If - // "exact == false", the "found_line" will be the closest line entry - // with a line number greater than "line" and we will use this for our - // subsequent line exact matches below. - uint32_t found_line = line_entry.line; - - while (line_idx != UINT32_MAX) { - // If they only asked for the line entry, then we're done, we can - // just copy that over. But if they wanted more than just the line - // number, fill it in. - if (resolve_scope == eSymbolContextLineEntry) { - sc.line_entry = line_entry; - } else { - line_entry.range.GetBaseAddress().CalculateSymbolContext(&sc, - resolve_scope); - } - - sc_list.Append(sc); - if (num_file_indexes == 1) - line_idx = line_table->FindLineEntryIndexByFileIndex( - line_idx + 1, file_indexes.front(), found_line, true, &line_entry); - else - line_idx = line_table->FindLineEntryIndexByFileIndex( - line_idx + 1, file_indexes, found_line, true, &line_entry); } + return sc_list.GetSize() - prev_size; } bool CompileUnit::GetIsOptimized() { From d1a561d446809cc3b5c11c163b9aa5ba4957af68 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 28 Nov 2019 13:30:39 +0100 Subject: [PATCH 177/591] [lldb] Simplify and improve FileSpecTest Summary: A most of these tests create FileSpecs with a hardcoded style. Add utility functions which create a file spec of a given style to simplify things. While in there add SCOPED_TRACE messages to tests which loop over multiple inputs to ensure it's clear which of the inputs failed. Reviewers: teemperor Subscribers: lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D70814 --- lldb/unittests/Utility/FileSpecTest.cpp | 48 ++++++++++++------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/lldb/unittests/Utility/FileSpecTest.cpp b/lldb/unittests/Utility/FileSpecTest.cpp index 0f5b1652d2989..4f91d11fdf1e2 100644 --- a/lldb/unittests/Utility/FileSpecTest.cpp +++ b/lldb/unittests/Utility/FileSpecTest.cpp @@ -12,6 +12,14 @@ using namespace lldb_private; +static FileSpec PosixSpec(llvm::StringRef path) { + return FileSpec(path, FileSpec::Style::posix); +} + +static FileSpec WindowsSpec(llvm::StringRef path) { + return FileSpec(path, FileSpec::Style::windows); +} + TEST(FileSpecTest, FileAndDirectoryComponents) { FileSpec fs_posix("/foo/bar", FileSpec::Style::posix); EXPECT_STREQ("/foo/bar", fs_posix.GetCString()); @@ -106,8 +114,7 @@ TEST(FileSpecTest, AppendPathComponent) { } TEST(FileSpecTest, CopyByAppendingPathComponent) { - FileSpec fs = FileSpec("/foo", FileSpec::Style::posix) - .CopyByAppendingPathComponent("bar"); + FileSpec fs = PosixSpec("/foo").CopyByAppendingPathComponent("bar"); EXPECT_STREQ("/foo/bar", fs.GetCString()); EXPECT_STREQ("/foo", fs.GetDirectory().GetCString()); EXPECT_STREQ("bar", fs.GetFilename().GetCString()); @@ -136,9 +143,7 @@ TEST(FileSpecTest, PrependPathComponent) { } TEST(FileSpecTest, EqualSeparator) { - FileSpec backward("C:\\foo\\bar", FileSpec::Style::windows); - FileSpec forward("C:/foo/bar", FileSpec::Style::windows); - EXPECT_EQ(forward, backward); + EXPECT_EQ(WindowsSpec("C:\\foo\\bar"), WindowsSpec("C:/foo/bar")); } TEST(FileSpecTest, EqualDotsWindows) { @@ -153,9 +158,8 @@ TEST(FileSpecTest, EqualDotsWindows) { }; for (const auto &test : tests) { - FileSpec one(test.first, FileSpec::Style::windows); - FileSpec two(test.second, FileSpec::Style::windows); - EXPECT_EQ(one, two); + SCOPED_TRACE(llvm::Twine(test.first) + " <=> " + test.second); + EXPECT_EQ(WindowsSpec(test.first), WindowsSpec(test.second)); } } @@ -169,9 +173,8 @@ TEST(FileSpecTest, EqualDotsPosix) { }; for (const auto &test : tests) { - FileSpec one(test.first, FileSpec::Style::posix); - FileSpec two(test.second, FileSpec::Style::posix); - EXPECT_EQ(one, two); + SCOPED_TRACE(llvm::Twine(test.first) + " <=> " + test.second); + EXPECT_EQ(PosixSpec(test.first), PosixSpec(test.second)); } } @@ -183,9 +186,8 @@ TEST(FileSpecTest, EqualDotsPosixRoot) { }; for (const auto &test : tests) { - FileSpec one(test.first, FileSpec::Style::posix); - FileSpec two(test.second, FileSpec::Style::posix); - EXPECT_EQ(one, two); + SCOPED_TRACE(llvm::Twine(test.first) + " <=> " + test.second); + EXPECT_EQ(PosixSpec(test.first), PosixSpec(test.second)); } } @@ -200,7 +202,7 @@ TEST(FileSpecTest, GuessPathStyle) { EXPECT_EQ(llvm::None, FileSpec::GuessPathStyle("foo/bar.txt")); } -TEST(FileSpecTest, GetNormalizedPath) { +TEST(FileSpecTest, GetPath) { std::pair posix_tests[] = { {"/foo/.././bar", "/bar"}, {"/foo/./../bar", "/bar"}, @@ -230,8 +232,7 @@ TEST(FileSpecTest, GetNormalizedPath) { }; for (auto test : posix_tests) { SCOPED_TRACE(llvm::Twine("test.first = ") + test.first); - EXPECT_EQ(test.second, - FileSpec(test.first, FileSpec::Style::posix).GetPath()); + EXPECT_EQ(test.second, PosixSpec(test.first).GetPath()); } std::pair windows_tests[] = { @@ -262,9 +263,8 @@ TEST(FileSpecTest, GetNormalizedPath) { {R"(..\..\foo)", R"(..\..\foo)"}, }; for (auto test : windows_tests) { - EXPECT_EQ(test.second, - FileSpec(test.first, FileSpec::Style::windows).GetPath()) - << "Original path: " << test.first; + SCOPED_TRACE(llvm::Twine("test.first = ") + test.first); + EXPECT_EQ(test.second, WindowsSpec(test.first).GetPath()); } } @@ -315,8 +315,8 @@ TEST(FileSpecTest, IsRelative) { "/foo/../.", }; for (const auto &path: not_relative) { - FileSpec spec(path, FileSpec::Style::posix); - EXPECT_FALSE(spec.IsRelative()); + SCOPED_TRACE(path); + EXPECT_FALSE(PosixSpec(path).IsRelative()); } llvm::StringRef is_relative[] = { ".", @@ -333,8 +333,8 @@ TEST(FileSpecTest, IsRelative) { "./foo/bar.c" }; for (const auto &path: is_relative) { - FileSpec spec(path, FileSpec::Style::posix); - EXPECT_TRUE(spec.IsRelative()); + SCOPED_TRACE(path); + EXPECT_TRUE(PosixSpec(path).IsRelative()); } } From bf716eb807409adf6490cb1cf595fb51efbd3fe6 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 28 Nov 2019 13:47:58 +0100 Subject: [PATCH 178/591] [lldb] Add FileSpec::Equal unit tests this is in preparation of a refactor of this method. --- lldb/unittests/Utility/FileSpecTest.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/lldb/unittests/Utility/FileSpecTest.cpp b/lldb/unittests/Utility/FileSpecTest.cpp index 4f91d11fdf1e2..132c7cb94fad9 100644 --- a/lldb/unittests/Utility/FileSpecTest.cpp +++ b/lldb/unittests/Utility/FileSpecTest.cpp @@ -379,3 +379,23 @@ TEST(FileSpecTest, RemoveLastPathComponent) { EXPECT_FALSE(fs_windows.RemoveLastPathComponent()); EXPECT_STREQ("C:", fs_windows.GetCString()); } + +TEST(FileSpecTest, Equal) { + auto Eq = [](const char *a, const char *b, bool full) { + return FileSpec::Equal(PosixSpec(a), PosixSpec(b), full); + }; + EXPECT_TRUE(Eq("/foo/bar", "/foo/bar", true)); + EXPECT_TRUE(Eq("/foo/bar", "/foo/bar", false)); + + EXPECT_FALSE(Eq("/foo/bar", "/foo/baz", true)); + EXPECT_FALSE(Eq("/foo/bar", "/foo/baz", false)); + + EXPECT_FALSE(Eq("/bar/foo", "/baz/foo", true)); + EXPECT_FALSE(Eq("/bar/foo", "/baz/foo", false)); + + EXPECT_FALSE(Eq("/bar/foo", "foo", true)); + EXPECT_TRUE(Eq("/bar/foo", "foo", false)); + + EXPECT_FALSE(Eq("foo", "/bar/foo", true)); + EXPECT_TRUE(Eq("foo", "/bar/foo", false)); +} From b18e190b7ca90c09566382a039887f6eafe63d0d Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 28 Nov 2019 13:48:05 +0100 Subject: [PATCH 179/591] [lldb] refactor FileSpec::Equal The logic of this function was quite hard to follow. Replace it with a much simpler, equivalent, implementation. --- lldb/source/Utility/FileSpec.cpp | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/lldb/source/Utility/FileSpec.cpp b/lldb/source/Utility/FileSpec.cpp index 88966843072b6..7fb6e9db72c8c 100644 --- a/lldb/source/Utility/FileSpec.cpp +++ b/lldb/source/Utility/FileSpec.cpp @@ -302,20 +302,10 @@ int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) { } bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) { - // case sensitivity of equality test - const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive(); - - const bool filenames_equal = ConstString::Equals(a.m_filename, - b.m_filename, - case_sensitive); - - if (!filenames_equal) - return false; - - if (!full && (a.GetDirectory().IsEmpty() || b.GetDirectory().IsEmpty())) - return filenames_equal; + if (full || (a.GetDirectory() && b.GetDirectory())) + return a == b; - return a == b; + return a.FileEquals(b); } llvm::Optional FileSpec::GuessPathStyle(llvm::StringRef absolute_path) { From 3cd8ba0e37a035a134dc01ce260040f1d57f4d40 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 15:05:10 +0100 Subject: [PATCH 180/591] [lldb][NFC] Remove unused CompilerDecl::IsClang --- lldb/include/lldb/Symbol/CompilerDecl.h | 2 -- lldb/source/Symbol/CompilerDecl.cpp | 4 ---- 2 files changed, 6 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompilerDecl.h b/lldb/include/lldb/Symbol/CompilerDecl.h index 4817ec4b22670..7e4755a58c59f 100644 --- a/lldb/include/lldb/Symbol/CompilerDecl.h +++ b/lldb/include/lldb/Symbol/CompilerDecl.h @@ -39,8 +39,6 @@ class CompilerDecl { return m_type_system != nullptr && m_opaque_decl != nullptr; } - bool IsClang() const; - // Accessors TypeSystem *GetTypeSystem() const { return m_type_system; } diff --git a/lldb/source/Symbol/CompilerDecl.cpp b/lldb/source/Symbol/CompilerDecl.cpp index 3d17d802dd044..017e541bd203c 100644 --- a/lldb/source/Symbol/CompilerDecl.cpp +++ b/lldb/source/Symbol/CompilerDecl.cpp @@ -13,10 +13,6 @@ using namespace lldb_private; -bool CompilerDecl::IsClang() const { - return IsValid() && llvm::isa(m_type_system); -} - ConstString CompilerDecl::GetName() const { return m_type_system->DeclGetName(m_opaque_decl); } From e0203b25af92a3388580d6ef4eb386058720449e Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 15:14:24 +0100 Subject: [PATCH 181/591] [lldb][NFC] Simplify CompilerDecl and CompilerDeclContext initialization --- lldb/include/lldb/Symbol/CompilerDecl.h | 8 +++----- lldb/include/lldb/Symbol/CompilerDeclContext.h | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompilerDecl.h b/lldb/include/lldb/Symbol/CompilerDecl.h index 7e4755a58c59f..e4687ffb38536 100644 --- a/lldb/include/lldb/Symbol/CompilerDecl.h +++ b/lldb/include/lldb/Symbol/CompilerDecl.h @@ -18,13 +18,11 @@ namespace lldb_private { class CompilerDecl { public: // Constructors and Destructors - CompilerDecl() : m_type_system(nullptr), m_opaque_decl(nullptr) {} + CompilerDecl() = default; CompilerDecl(TypeSystem *type_system, void *decl) : m_type_system(type_system), m_opaque_decl(decl) {} - ~CompilerDecl() {} - // Tests explicit operator bool() const { return IsValid(); } @@ -73,8 +71,8 @@ class CompilerDecl { CompilerType GetFunctionArgumentType(size_t arg_idx) const; private: - TypeSystem *m_type_system; - void *m_opaque_decl; + TypeSystem *m_type_system = nullptr; + void *m_opaque_decl = nullptr; }; bool operator==(const CompilerDecl &lhs, const CompilerDecl &rhs); diff --git a/lldb/include/lldb/Symbol/CompilerDeclContext.h b/lldb/include/lldb/Symbol/CompilerDeclContext.h index e7958c08d8334..c140a3df13d02 100644 --- a/lldb/include/lldb/Symbol/CompilerDeclContext.h +++ b/lldb/include/lldb/Symbol/CompilerDeclContext.h @@ -19,13 +19,11 @@ namespace lldb_private { class CompilerDeclContext { public: // Constructors and Destructors - CompilerDeclContext() : m_type_system(nullptr), m_opaque_decl_ctx(nullptr) {} + CompilerDeclContext() = default; CompilerDeclContext(TypeSystem *type_system, void *decl_ctx) : m_type_system(type_system), m_opaque_decl_ctx(decl_ctx) {} - ~CompilerDeclContext() {} - // Tests explicit operator bool() const { return IsValid(); } @@ -105,8 +103,8 @@ class CompilerDeclContext { bool IsStructUnionOrClass() const; private: - TypeSystem *m_type_system; - void *m_opaque_decl_ctx; + TypeSystem *m_type_system = nullptr; + void *m_opaque_decl_ctx = nullptr; }; bool operator==(const CompilerDeclContext &lhs, const CompilerDeclContext &rhs); From f39277c1d370ccbbec2e20a20375ee6fb7281ae4 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 15:29:09 +0100 Subject: [PATCH 182/591] [lldb][NFC] Remove unused variable in ClangASTSource::CompleteType Now that CompilerDeclContext is a trivial class, Clang started warning that this unused variable is in fact unused. Let's remove it. --- lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index 7440f6a0c3636..2b484db3a188e 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -363,7 +363,6 @@ void ClangASTSource::CompleteType(TagDecl *tag_decl) { TypeList types; ConstString name(tag_decl->getName().str().c_str()); - CompilerDeclContext namespace_decl; const ModuleList &module_list = m_target->GetImages(); From c2dd84e396d091ca61b06b59c622b444ffc17234 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Thu, 28 Nov 2019 15:43:26 +0100 Subject: [PATCH 183/591] [lldb][NFC] Remove CompilerDeclContext::IsClang This method is only used in ClangASTContext. Also removes the includes we only needed for the ClangASTContext RTTI check in the CompilerDecl[Context].cpp files. --- lldb/include/lldb/Symbol/CompilerDeclContext.h | 2 -- lldb/source/Symbol/ClangASTContext.cpp | 14 +++++++++----- lldb/source/Symbol/CompilerDecl.cpp | 1 - lldb/source/Symbol/CompilerDeclContext.cpp | 5 ----- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompilerDeclContext.h b/lldb/include/lldb/Symbol/CompilerDeclContext.h index c140a3df13d02..fe8539ab30e68 100644 --- a/lldb/include/lldb/Symbol/CompilerDeclContext.h +++ b/lldb/include/lldb/Symbol/CompilerDeclContext.h @@ -38,8 +38,6 @@ class CompilerDeclContext { return m_type_system != nullptr && m_opaque_decl_ctx != nullptr; } - bool IsClang() const; - std::vector FindDeclByName(ConstString name, const bool ignore_using_decls); diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index e413029f03005..e70b005550d10 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -10199,16 +10199,20 @@ bool ClangASTContext::DeclContextIsContainedInLookup( return false; } +static bool IsClangDeclContext(const CompilerDeclContext &dc) { + return dc.IsValid() && isa(dc.GetTypeSystem()); +} + clang::DeclContext * ClangASTContext::DeclContextGetAsDeclContext(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return (clang::DeclContext *)dc.GetOpaqueDeclContext(); return nullptr; } ObjCMethodDecl * ClangASTContext::DeclContextGetAsObjCMethodDecl(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return llvm::dyn_cast( (clang::DeclContext *)dc.GetOpaqueDeclContext()); return nullptr; @@ -10216,7 +10220,7 @@ ClangASTContext::DeclContextGetAsObjCMethodDecl(const CompilerDeclContext &dc) { CXXMethodDecl * ClangASTContext::DeclContextGetAsCXXMethodDecl(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return llvm::dyn_cast( (clang::DeclContext *)dc.GetOpaqueDeclContext()); return nullptr; @@ -10224,7 +10228,7 @@ ClangASTContext::DeclContextGetAsCXXMethodDecl(const CompilerDeclContext &dc) { clang::FunctionDecl * ClangASTContext::DeclContextGetAsFunctionDecl(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return llvm::dyn_cast( (clang::DeclContext *)dc.GetOpaqueDeclContext()); return nullptr; @@ -10232,7 +10236,7 @@ ClangASTContext::DeclContextGetAsFunctionDecl(const CompilerDeclContext &dc) { clang::NamespaceDecl * ClangASTContext::DeclContextGetAsNamespaceDecl(const CompilerDeclContext &dc) { - if (dc.IsClang()) + if (IsClangDeclContext(dc)) return llvm::dyn_cast( (clang::DeclContext *)dc.GetOpaqueDeclContext()); return nullptr; diff --git a/lldb/source/Symbol/CompilerDecl.cpp b/lldb/source/Symbol/CompilerDecl.cpp index 017e541bd203c..48d9169c1a7a2 100644 --- a/lldb/source/Symbol/CompilerDecl.cpp +++ b/lldb/source/Symbol/CompilerDecl.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/Symbol/CompilerDecl.h" -#include "lldb/Symbol/ClangASTContext.h" #include "lldb/Symbol/CompilerDeclContext.h" #include "lldb/Symbol/TypeSystem.h" diff --git a/lldb/source/Symbol/CompilerDeclContext.cpp b/lldb/source/Symbol/CompilerDeclContext.cpp index 7d45f47ad133c..672de6ec34d1e 100644 --- a/lldb/source/Symbol/CompilerDeclContext.cpp +++ b/lldb/source/Symbol/CompilerDeclContext.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "lldb/Symbol/CompilerDeclContext.h" -#include "lldb/Symbol/ClangASTContext.h" #include "lldb/Symbol/CompilerDecl.h" #include "lldb/Symbol/TypeSystem.h" #include @@ -24,10 +23,6 @@ CompilerDeclContext::FindDeclByName(ConstString name, return std::vector(); } -bool CompilerDeclContext::IsClang() const { - return IsValid() && llvm::isa(m_type_system); -} - ConstString CompilerDeclContext::GetName() const { if (IsValid()) return m_type_system->DeclContextGetName(m_opaque_decl_ctx); From ed864745c97ece86e29957cb94b5a3e8dee86859 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milo=C5=A1=20Stojanovi=C4=87?= Date: Thu, 28 Nov 2019 16:18:28 +0100 Subject: [PATCH 184/591] [OpenMP][test] Fix test on MIPS-based buildbots On MIPS `zeroext` or `signext` can appear in the output. Differential Revision: https://reviews.llvm.org/D70820 --- clang/test/OpenMP/parallel_codegen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index bacb2c6b06eef..498a0590b51de 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -109,7 +109,7 @@ int main (int argc, char **argv) { // CHECK-DEBUG-NEXT: ret i32 0 // CHECK-DEBUG-NEXT: } -// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}} %{{.+}}) +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc, i{{64|32}}{{.*}} %{{.+}}) // CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], // CHECK: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // CHECK: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] From acd7fe8636ab1d892a935ca747ed9bb6420e2253 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 28 Nov 2019 15:31:41 +0000 Subject: [PATCH 185/591] [AArch64][v8.3a] Don't emit LDRA '[xN]!' alias in disassembly. Summary: In rG643ac6c0420b, the syntax `ldraa x1, [x0]!` was added as an alias for `ldraa x1, [x0, #0]!`. That syntax is less obvious in meaning, and also will not be accepted by assemblers that haven't been updated yet. So it would be better not to emit it as the preferred disassembly for that instruction. This change lowers the EmitPriority of the new alias so that the more explicit syntax `[x0, #0]!` is preferred by the disassembler. The new syntax is still accepted by the assembler. Reviewers: ab, ostannard Reviewed By: ostannard Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70813 --- llvm/lib/Target/AArch64/AArch64InstrFormats.td | 2 +- llvm/test/MC/AArch64/armv8.3a-signed-pointer.s | 4 ++-- llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 5da790c846179..878cb79eb3267 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1473,7 +1473,7 @@ multiclass AuthLoad { (!cast(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>; def : InstAlias(NAME # "writeback") GPR64sp:$wback, GPR64:$Rt, 0)>; + (!cast(NAME # "writeback") GPR64sp:$wback, GPR64:$Rt, 0), 0>; } //--- diff --git a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s index 2ca15fceccc8f..056a3ae86c07f 100644 --- a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s +++ b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s @@ -307,10 +307,10 @@ // CHECK-REQ: error: instruction requires: pa // CHECK-REQ-NEXT: ldrab x0, [x1] ldraa x0, [x1]! -// CHECK-NEXT: ldraa x0, [x1]! // encoding: [0x20,0x0c,0x20,0xf8] +// CHECK-NEXT: ldraa x0, [x1, #0]! // encoding: [0x20,0x0c,0x20,0xf8] // CHECK-REQ: error: instruction requires: pa // CHECK-REQ-NEXT: ldraa x0, [x1]! ldrab x0, [x1]! -// CHECK-NEXT: ldrab x0, [x1]! // encoding: [0x20,0x0c,0xa0,0xf8] +// CHECK-NEXT: ldrab x0, [x1, #0]! // encoding: [0x20,0x0c,0xa0,0xf8] // CHECK-REQ: error: instruction requires: pa // CHECK-REQ-NEXT: ldrab x0, [x1]! diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt b/llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt index d11056044fa48..7215d086c693c 100644 --- a/llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt +++ b/llvm/test/MC/Disassembler/AArch64/armv8.3a-signed-pointer.txt @@ -114,7 +114,7 @@ [0x20,0x04,0x20,0xf8] [0x20,0x04,0xa0,0xf8] -# CHECK: ldraa x0, [x1]! -# CHECK: ldrab x0, [x1]! +# CHECK: ldraa x0, [x1, #0]! +# CHECK: ldrab x0, [x1, #0]! [0x20,0x0c,0x20,0xf8] [0x20,0x0c,0xa0,0xf8] From 256ad954a9e453e55b950207ca433da8da883a33 Mon Sep 17 00:00:00 2001 From: Austin Kerbow Date: Fri, 22 Nov 2019 12:25:13 -0800 Subject: [PATCH 186/591] AMDGPU: Reuse carry out register during FI elimination Summary: Pre gfx9 we need to scavenge a 64-bit SGPR to use as the carry out for an Add. If only one SGPR was available this crashed when trying to scavenge another 32bit SGPR to materialize the offset. Instead, reuse a 32-bit SGPR from the carry out as the offset register. Also prefer to use vcc for the unused carry out when it is available. Reviewers: arsenm, rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70614 --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 14 ++-- .../AMDGPU/pei-scavenge-sgpr-carry-out.mir | 83 +++++++++++++++++++ 3 files changed, 97 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ed915f03be217..5e39e7c119bc4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6211,7 +6211,11 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, if (ST.hasAddNoCarry()) return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e32), DestReg); - Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // If available, prefer to use vcc. + Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC) + ? Register(RI.getVCC()) + : RS.scavengeRegister(RI.getBoolRC(), I, 0, false); + // TODO: Users need to deal with this. if (!UnusedCarry.isValid()) return MachineInstrBuilder(); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index efcc7266316e0..488bd270ac900 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1123,11 +1123,15 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (!IsVOP2) MIB.addImm(0); // clamp bit } else { - Register ConstOffsetReg = - RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false); - - // This should always be able to use the unused carry out. - assert(ConstOffsetReg && "this scavenge should not be able to fail"); + assert(MIB->getOpcode() == AMDGPU::V_ADD_I32_e64 && + "Need to reuse carry out register"); + + // Use scavenged unused carry out as offset register. + Register ConstOffsetReg; + if (!isWave32) + ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0); + else + ConstOffsetReg = MIB.getReg(1); BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) .addImm(Offset); diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index dc7a7c804bee1..1c7adc39fe290 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -91,3 +91,86 @@ body: | $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr31 S_ENDPGM 0, implicit $vcc ... + +# When only one 64-bit SGPR is available for the unused carry out pre gfx9, +# we must reuse one of the 32-bit SGPR sub-regs to materialize the offset. + +--- +name: scavenge_sgpr_pei_one_sgpr_64 +tracksRegLiveness: true + +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } + +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr34 + frameOffsetReg: $sgpr33 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr1 + + ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64 + ; CHECK: liveins: $vgpr1 + ; CHECK: $sgpr27 = frame-setup COPY $sgpr33 + ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc + ; CHECK: $sgpr28 = S_SUB_U32 $sgpr33, $sgpr34, implicit-def $scc + ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, killed $sgpr28, implicit $exec + ; CHECK: $sgpr28 = S_MOV_B32 8192 + ; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_I32_e64 killed $sgpr28, killed $vgpr3, 0, implicit $exec + ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 + ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 + ; CHECK: S_ENDPGM 0, implicit $vcc + S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc + $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 + S_ENDPGM 0, implicit $vcc +... + +# Prefer to use vcc as unused carry out. + +--- +name: scavenge_sgpr_pei_prefer_vcc +tracksRegLiveness: true + +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 8192 } + - { id: 1, type: default, offset: 0, size: 4, alignment: 8192 } + +machineFunctionInfo: + isEntryFunction: false + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr34 + frameOffsetReg: $sgpr33 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr1 + + ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc + ; CHECK: liveins: $vgpr1 + ; CHECK: $sgpr27 = frame-setup COPY $sgpr33 + ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc + ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31 + ; CHECK: $vcc_hi = S_SUB_U32 $sgpr33, $sgpr34, implicit-def $scc + ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, killed $vcc_hi, implicit $exec + ; CHECK: $vcc_lo = S_MOV_B32 8192 + ; CHECK: $vgpr2, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec + ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 + ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 + ; CHECK: S_ENDPGM 0 + S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31 + $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 + S_ENDPGM 0 +... From b4dfc5508f9239f50a3c44dd64e82a488b698b29 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Thu, 28 Nov 2019 11:16:55 -0500 Subject: [PATCH 187/591] [LLDB] Fix wrong argument in CommandObjectThreadStepWithTypeAndScope Differential Revision: https://reviews.llvm.org/D70448 --- lldb/source/Commands/CommandObjectThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index c93bd9d5c2323..a74eec01933b2 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -526,7 +526,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed { eCommandProcessMustBeLaunched | eCommandProcessMustBePaused), m_step_type(step_type), m_step_scope(step_scope), m_options(), - m_class_options("scripted step", 'C') { + m_class_options("scripted step") { CommandArgumentEntry arg; CommandArgumentData thread_id_arg; From bdad3ec75ab35ade2433b1278689d483dcf9abc4 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Thu, 28 Nov 2019 14:15:13 -0500 Subject: [PATCH 188/591] [LLDB] On Windows, force error message formatting to English This fixes the Utility/StatusTest.ErrorWin32 unit test on non-English locales. Differential Revision: https://reviews.llvm.org/D70442 --- lldb/source/Utility/Status.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lldb/source/Utility/Status.cpp b/lldb/source/Utility/Status.cpp index 3b5094d64b75a..b74db72773dd4 100644 --- a/lldb/source/Utility/Status.cpp +++ b/lldb/source/Utility/Status.cpp @@ -100,14 +100,23 @@ static std::string RetrieveWin32ErrorString(uint32_t error_code) { char *buffer = nullptr; std::string message; // Retrieve win32 system error. + // First, attempt to load a en-US message if (::FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_MAX_WIDTH_MASK, - NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + NULL, error_code, MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), (LPSTR)&buffer, 0, NULL)) { message.assign(buffer); ::LocalFree(buffer); } + // If the previous didn't work, use the default OS language + else if (::FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, error_code, 0, (LPSTR)&buffer, 0, NULL)) { + message.assign(buffer); + ::LocalFree(buffer); + } return message; } #endif From c671639af6a96c31d3c0e5487051bef28bad1640 Mon Sep 17 00:00:00 2001 From: Konrad Kleine Date: Thu, 28 Nov 2019 16:54:15 +0100 Subject: [PATCH 189/591] [lldb] NFC: refactor CompileUnit::ResolveSymbolContext Summary: I found the above named method hard to read because it had a) many nested blocks, b) one return statement at the end with some logic involved, c) a duplicated while-loop with just small differences in it. I decided to refactor this function by employing an early exit strategy. In order to capture the logic in the return statement and to not have it repeated more than once I chose to implement a very small lamda function that captures all the variables it needs. I also replaced the two while-loops with just one. This is a non-functional change (NFC). Reviewers: jdoerfert, teemperor Reviewed By: teemperor Subscribers: labath, teemperor, lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D70774 --- lldb/include/lldb/Symbol/CompileUnit.h | 11 +- lldb/source/API/SBThread.cpp | 7 +- lldb/source/Core/AddressResolverFileLine.cpp | 7 +- lldb/source/Symbol/CompileUnit.cpp | 137 ++++++++----------- 4 files changed, 71 insertions(+), 91 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h index 7efbf792b1a92..b5f37f6789007 100644 --- a/lldb/include/lldb/Symbol/CompileUnit.h +++ b/lldb/include/lldb/Symbol/CompileUnit.h @@ -381,14 +381,11 @@ class CompileUnit : public std::enable_shared_from_this, /// A SymbolContext list class that will get any matching /// entries appended to. /// - /// \return - /// The number of new matches that were added to \a sc_list. - /// /// \see enum SymbolContext::Scope - uint32_t ResolveSymbolContext(const FileSpec &file_spec, uint32_t line, - bool check_inlines, bool exact, - lldb::SymbolContextItem resolve_scope, - SymbolContextList &sc_list); + void ResolveSymbolContext(const FileSpec &file_spec, uint32_t line, + bool check_inlines, bool exact, + lldb::SymbolContextItem resolve_scope, + SymbolContextList &sc_list); /// Get whether compiler optimizations were enabled for this compile unit /// diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index 8d4930bf6edb0..2dada9a6118db 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -914,9 +914,10 @@ SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame, const bool exact = false; SymbolContextList sc_list; - const uint32_t num_matches = frame_sc.comp_unit->ResolveSymbolContext( - step_file_spec, line, check_inlines, exact, eSymbolContextLineEntry, - sc_list); + frame_sc.comp_unit->ResolveSymbolContext(step_file_spec, line, + check_inlines, exact, + eSymbolContextLineEntry, sc_list); + const uint32_t num_matches = sc_list.GetSize(); if (num_matches > 0) { SymbolContext sc; for (uint32_t i = 0; i < num_matches; ++i) { diff --git a/lldb/source/Core/AddressResolverFileLine.cpp b/lldb/source/Core/AddressResolverFileLine.cpp index 4a14260c6c72f..4122b5d3b747d 100644 --- a/lldb/source/Core/AddressResolverFileLine.cpp +++ b/lldb/source/Core/AddressResolverFileLine.cpp @@ -40,14 +40,13 @@ Searcher::CallbackReturn AddressResolverFileLine::SearchCallback(SearchFilter &filter, SymbolContext &context, Address *addr) { SymbolContextList sc_list; - uint32_t sc_list_size; CompileUnit *cu = context.comp_unit; Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_BREAKPOINTS)); - sc_list_size = - cu->ResolveSymbolContext(m_file_spec, m_line_number, m_inlines, false, - eSymbolContextEverything, sc_list); + cu->ResolveSymbolContext(m_file_spec, m_line_number, m_inlines, false, + eSymbolContextEverything, sc_list); + uint32_t sc_list_size = sc_list.GetSize(); for (uint32_t i = 0; i < sc_list_size; i++) { SymbolContext sc; if (sc_list.GetContextAtIndex(i, sc)) { diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index b37636c3bafc1..82074367ec8fb 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -244,11 +244,11 @@ uint32_t CompileUnit::FindLineEntry(uint32_t start_idx, uint32_t line, return UINT32_MAX; } -uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, - uint32_t line, bool check_inlines, - bool exact, - SymbolContextItem resolve_scope, - SymbolContextList &sc_list) { +void CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, + uint32_t line, bool check_inlines, + bool exact, + SymbolContextItem resolve_scope, + SymbolContextList &sc_list) { // First find all of the file indexes that match our "file_spec". If // "file_spec" has an empty directory, then only compare the basenames when // finding file indexes @@ -260,7 +260,7 @@ uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, // If we are not looking for inlined functions and our file spec doesn't // match then we are done... if (!file_spec_matches_cu_file_spec && !check_inlines) - return 0; + return; uint32_t file_idx = GetSupportFiles().FindFileIndex(1, file_spec, true); @@ -271,84 +271,67 @@ uint32_t CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, const size_t num_file_indexes = file_indexes.size(); if (num_file_indexes == 0) - return 0; - - const uint32_t prev_size = sc_list.GetSize(); + return; SymbolContext sc(GetModule()); sc.comp_unit = this; - if (line != 0) { - LineTable *line_table = sc.comp_unit->GetLineTable(); - - if (line_table != nullptr) { - uint32_t found_line; - uint32_t line_idx; - - if (num_file_indexes == 1) { - // We only have a single support file that matches, so use the line - // table function that searches for a line entries that match a single - // support file index - LineEntry line_entry; - line_idx = line_table->FindLineEntryIndexByFileIndex( - 0, file_indexes.front(), line, exact, &line_entry); - - // If "exact == true", then "found_line" will be the same as "line". If - // "exact == false", the "found_line" will be the closest line entry - // with a line number greater than "line" and we will use this for our - // subsequent line exact matches below. - found_line = line_entry.line; - - while (line_idx != UINT32_MAX) { - // If they only asked for the line entry, then we're done, we can - // just copy that over. But if they wanted more than just the line - // number, fill it in. - if (resolve_scope == eSymbolContextLineEntry) { - sc.line_entry = line_entry; - } else { - line_entry.range.GetBaseAddress().CalculateSymbolContext( - &sc, resolve_scope); - } - - sc_list.Append(sc); - line_idx = line_table->FindLineEntryIndexByFileIndex( - line_idx + 1, file_indexes.front(), found_line, true, - &line_entry); - } - } else { - // We found multiple support files that match "file_spec" so use the - // line table function that searches for a line entries that match a - // multiple support file indexes. - LineEntry line_entry; - line_idx = line_table->FindLineEntryIndexByFileIndex( - 0, file_indexes, line, exact, &line_entry); - - // If "exact == true", then "found_line" will be the same as "line". If - // "exact == false", the "found_line" will be the closest line entry - // with a line number greater than "line" and we will use this for our - // subsequent line exact matches below. - found_line = line_entry.line; - - while (line_idx != UINT32_MAX) { - if (resolve_scope == eSymbolContextLineEntry) { - sc.line_entry = line_entry; - } else { - line_entry.range.GetBaseAddress().CalculateSymbolContext( - &sc, resolve_scope); - } - - sc_list.Append(sc); - line_idx = line_table->FindLineEntryIndexByFileIndex( - line_idx + 1, file_indexes, found_line, true, &line_entry); - } - } + if (line == 0) { + if (file_spec_matches_cu_file_spec && !check_inlines) { + // only append the context if we aren't looking for inline call sites by + // file and line and if the file spec matches that of the compile unit + sc_list.Append(sc); } - } else if (file_spec_matches_cu_file_spec && !check_inlines) { - // only append the context if we aren't looking for inline call sites by - // file and line and if the file spec matches that of the compile unit + return; + } + + LineTable *line_table = sc.comp_unit->GetLineTable(); + + if (line_table == nullptr) + return; + + uint32_t line_idx; + LineEntry line_entry; + + if (num_file_indexes == 1) { + // We only have a single support file that matches, so use the line + // table function that searches for a line entries that match a single + // support file index + line_idx = line_table->FindLineEntryIndexByFileIndex( + 0, file_indexes.front(), line, exact, &line_entry); + } else { + // We found multiple support files that match "file_spec" so use the + // line table function that searches for a line entries that match a + // multiple support file indexes. + line_idx = line_table->FindLineEntryIndexByFileIndex(0, file_indexes, line, + exact, &line_entry); + } + + // If "exact == true", then "found_line" will be the same as "line". If + // "exact == false", the "found_line" will be the closest line entry + // with a line number greater than "line" and we will use this for our + // subsequent line exact matches below. + uint32_t found_line = line_entry.line; + + while (line_idx != UINT32_MAX) { + // If they only asked for the line entry, then we're done, we can + // just copy that over. But if they wanted more than just the line + // number, fill it in. + if (resolve_scope == eSymbolContextLineEntry) { + sc.line_entry = line_entry; + } else { + line_entry.range.GetBaseAddress().CalculateSymbolContext(&sc, + resolve_scope); + } + sc_list.Append(sc); + if (num_file_indexes == 1) + line_idx = line_table->FindLineEntryIndexByFileIndex( + line_idx + 1, file_indexes.front(), found_line, true, &line_entry); + else + line_idx = line_table->FindLineEntryIndexByFileIndex( + line_idx + 1, file_indexes, found_line, true, &line_entry); } - return sc_list.GetSize() - prev_size; } bool CompileUnit::GetIsOptimized() { From ec3efcf11ff2fcdb5a754e3bda942dd5bef0928e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 28 Nov 2019 22:08:05 +0100 Subject: [PATCH 190/591] [IVDescriptors] Skip FOR where we have multiple sink points for now. This fixes a crash with instructions where multiple operands are first-order-recurrences. --- llvm/lib/Analysis/IVDescriptors.cpp | 7 +++++ .../first-order-recurrence-complex.ll | 30 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index ce99226087fa2..3c33aa973cdd6 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -721,6 +721,13 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence( if (I->getParent()->getTerminator() == I) return false; + // Do not try to sink an instruction multiple times (if multiple operands + // are first order recurrences). + // TODO: We can support this case, by sinking the instruction after the + // 'deepest' previous instruction. + if (SinkAfter.find(I) != SinkAfter.end()) + return false; + if (DT->dominates(Previous, I)) // We already are good w/o sinking. return true; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index e09804276ec83..aa913172f7b57 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -243,3 +243,33 @@ for: exit: ret void } + +; TODO: We should be able to sink %tmp38 after %tmp60. +define void @instruction_with_2_FOR_operands() { +; CHECK-LABEL: define void @instruction_with_2_FOR_operands( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label %bb13 + +; CHECK-LABEL: bb13: +; CHECK: br i1 %tmp12, label %bb13, label %bb74 + +; CHECK-LABEL: bb74: +; CHECK-NEXT: ret void +; +bb: + br label %bb13 + +bb13: ; preds = %bb13, %bb + %tmp37 = phi float [ %tmp60, %bb13 ], [ undef, %bb ] + %tmp27 = phi float [ %tmp49, %bb13 ], [ undef, %bb ] + %indvars.iv = phi i64 [ %indvars.iv.next, %bb13 ], [ 0, %bb ] + %tmp38 = fmul fast float %tmp37, %tmp27 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %tmp49 = load float, float* undef, align 4 + %tmp60 = load float, float* undef, align 4 + %tmp12 = icmp slt i64 %indvars.iv, undef + br i1 %tmp12, label %bb13, label %bb74 + +bb74: ; preds = %bb13 + ret void +} From f4d32ae75bf515f443a2c99dce5c882f460c82bd Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 27 Nov 2019 19:09:50 +0300 Subject: [PATCH 191/591] [mips] Check that features required by built-ins are enabled Now Clang does not check that features required by built-in functions are enabled. That causes errors in the backend reported in PR44018. This patch fixes this bug by checking that required features are enabled. This should fix PR44018. Differential Revision: https://reviews.llvm.org/D70808 --- .../clang/Basic/DiagnosticSemaKinds.td | 6 +++ clang/include/clang/Sema/Sema.h | 2 + clang/lib/Basic/Targets/Mips.cpp | 3 ++ clang/lib/Sema/SemaChecking.cpp | 33 ++++++++++++++++- clang/test/CodeGen/builtins-mips-args.c | 3 +- clang/test/CodeGen/builtins-mips.c | 3 +- clang/test/Sema/builtins-mips-features.c | 37 +++++++++++++++++++ 7 files changed, 83 insertions(+), 4 deletions(-) create mode 100644 clang/test/Sema/builtins-mips-features.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 746320fa526b0..c30f65d94581e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8742,6 +8742,12 @@ def err_32_bit_builtin_64_bit_tgt : Error< "this builtin is only available on 32-bit targets">; def err_builtin_x64_aarch64_only : Error< "this builtin is only available on x86-64 and aarch64 targets">; +def err_mips_builtin_requires_dsp : Error< + "this builtin requires 'dsp' ASE, please use -mdsp">; +def err_mips_builtin_requires_dspr2 : Error< + "this builtin requires 'dsp r2' ASE, please use -mdspr2">; +def err_mips_builtin_requires_msa : Error< + "this builtin requires 'msa' ASE, please use -mmsa">; def err_ppc_builtin_only_on_pwr7 : Error< "this builtin is only valid on POWER7 or later CPUs">; def err_x86_builtin_invalid_rounding : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ac5a4953e00d7..59e8f34396692 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11282,6 +11282,8 @@ class Sema final { bool CheckHexagonBuiltinCpu(unsigned BuiltinID, CallExpr *TheCall); bool CheckHexagonBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall); bool CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool CheckMipsBuiltinCpu(unsigned BuiltinID, CallExpr *TheCall); + bool CheckMipsBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall); bool CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall); diff --git a/clang/lib/Basic/Targets/Mips.cpp b/clang/lib/Basic/Targets/Mips.cpp index b9ab80df61940..ead5e91f7c8f2 100644 --- a/clang/lib/Basic/Targets/Mips.cpp +++ b/clang/lib/Basic/Targets/Mips.cpp @@ -213,7 +213,10 @@ void MipsTargetInfo::getTargetDefines(const LangOptions &Opts, bool MipsTargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) .Case("mips", true) + .Case("dsp", DspRev >= DSP1) + .Case("dspr2", DspRev >= DSP2) .Case("fp64", FPMode == FP64) + .Case("msa", HasMSA) .Default(false); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index c19badf80137d..adefca7fe4e78 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3051,8 +3051,37 @@ bool Sema::CheckHexagonBuiltinFunctionCall(unsigned BuiltinID, CheckHexagonBuiltinArgument(BuiltinID, TheCall); } +bool Sema::CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + return CheckMipsBuiltinCpu(BuiltinID, TheCall) || + CheckMipsBuiltinArgument(BuiltinID, TheCall); +} + +bool Sema::CheckMipsBuiltinCpu(unsigned BuiltinID, CallExpr *TheCall) { + const TargetInfo &TI = Context.getTargetInfo(); -// CheckMipsBuiltinFunctionCall - Checks the constant value passed to the + if (Mips::BI__builtin_mips_addu_qb <= BuiltinID && + BuiltinID <= Mips::BI__builtin_mips_lwx) { + if (!TI.hasFeature("dsp")) + return Diag(TheCall->getBeginLoc(), diag::err_mips_builtin_requires_dsp); + } + + if (Mips::BI__builtin_mips_absq_s_qb <= BuiltinID && + BuiltinID <= Mips::BI__builtin_mips_subuh_r_qb) { + if (!TI.hasFeature("dspr2")) + return Diag(TheCall->getBeginLoc(), + diag::err_mips_builtin_requires_dspr2); + } + + if (Mips::BI__builtin_msa_add_a_b <= BuiltinID && + BuiltinID <= Mips::BI__builtin_msa_xori_b) { + if (!TI.hasFeature("msa")) + return Diag(TheCall->getBeginLoc(), diag::err_mips_builtin_requires_msa); + } + + return false; +} + +// CheckMipsBuiltinArgument - Checks the constant value passed to the // intrinsic is correct. The switch statement is ordered by DSP, MSA. The // ordering for DSP is unspecified. MSA is ordered by the data format used // by the underlying instruction i.e., df/m, df/n and then by size. @@ -3061,7 +3090,7 @@ bool Sema::CheckHexagonBuiltinFunctionCall(unsigned BuiltinID, // definitions from include/clang/Basic/BuiltinsMips.def. // FIXME: GCC is strict on signedness for some of these intrinsics, we should // be too. -bool Sema::CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { +bool Sema::CheckMipsBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall) { unsigned i = 0, l = 0, u = 0, m = 0; switch (BuiltinID) { default: return false; diff --git a/clang/test/CodeGen/builtins-mips-args.c b/clang/test/CodeGen/builtins-mips-args.c index cdb42af4a53d1..a135848805aaf 100644 --- a/clang/test/CodeGen/builtins-mips-args.c +++ b/clang/test/CodeGen/builtins-mips-args.c @@ -1,5 +1,6 @@ // REQUIRES: mips-registered-target -// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -target-feature +dspr2 \ +// RUN: -fsyntax-only -verify %s void foo() { // MIPS DSP Rev 1 diff --git a/clang/test/CodeGen/builtins-mips.c b/clang/test/CodeGen/builtins-mips.c index c6be896e81928..d26f630c35d7d 100644 --- a/clang/test/CodeGen/builtins-mips.c +++ b/clang/test/CodeGen/builtins-mips.c @@ -1,5 +1,6 @@ // REQUIRES: mips-registered-target -// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -emit-llvm %s -o - \ +// RUN: %clang_cc1 -triple mips-unknown-linux-gnu -emit-llvm %s \ +// RUN: -target-feature +dspr2 -o - \ // RUN: | FileCheck %s typedef int q31; diff --git a/clang/test/Sema/builtins-mips-features.c b/clang/test/Sema/builtins-mips-features.c new file mode 100644 index 0000000000000..4ea36d7f24dc0 --- /dev/null +++ b/clang/test/Sema/builtins-mips-features.c @@ -0,0 +1,37 @@ +// REQUIRES: mips-registered-target +// RUN: %clang_cc1 -triple mips64 -fsyntax-only -verify %s + +typedef signed char v4i8 __attribute__ ((vector_size(4))); +typedef signed char v4q7 __attribute__ ((vector_size(4))); +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); + +void dsp() { + v4i8 a; + void* p; + + // expected-error@+1 {{this builtin requires 'dsp' ASE, please use -mdsp}} + __builtin_mips_addu_qb(a, a); + // expected-error@+1 {{this builtin requires 'dsp' ASE, please use -mdsp}} + __builtin_mips_lwx(p, 32); +} + +void dspr2() { + v4i8 a; + v4q7 b; + + // expected-error@+1 {{this builtin requires 'dsp r2' ASE, please use -mdspr2}} + __builtin_mips_absq_s_qb(b); + // expected-error@+1 {{this builtin requires 'dsp r2' ASE, please use -mdspr2}} + __builtin_mips_subuh_r_qb(a, a); +} + +void msa() { + v16i8 a; + v16u8 b; + + // expected-error@+1 {{this builtin requires 'msa' ASE, please use -mmsa}} + __builtin_msa_add_a_b(a, a); + // expected-error@+1 {{this builtin requires 'msa' ASE, please use -mmsa}} + __builtin_msa_xori_b(b, 5); +} From 674df13b5fa7ffbd273455d547eff4507a2fcaff Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 25 Nov 2019 21:57:27 -0800 Subject: [PATCH 192/591] [ORC][JITLink] Add support for weak references, and improve handling of static libraries. This patch substantially updates ORCv2's lookup API in order to support weak references, and to better support static archives. Key changes: -- Each symbol being looked for is now associated with a SymbolLookupFlags value. If the associated value is SymbolLookupFlags::RequiredSymbol then the symbol must be defined in one of the JITDylibs being searched (or be able to be generated in one of these JITDylibs via an attached definition generator) or the lookup will fail with an error. If the associated value is SymbolLookupFlags::WeaklyReferencedSymbol then the symbol is permitted to be undefined, in which case it will simply not appear in the resulting SymbolMap if the rest of the lookup succeeds. Since lookup now requires these flags for each symbol, the lookup method now takes an instance of a new SymbolLookupSet type rather than a SymbolNameSet. SymbolLookupSet is a vector-backed set of (name, flags) pairs. Clients are responsible for ensuring that the set property (i.e. unique elements) holds, though this is usually simple and SymbolLookupSet provides convenience methods to support this. -- Lookups now have an associated LookupKind value, which is either LookupKind::Static or LookupKind::DLSym. Definition generators can inspect the lookup kind when determining whether or not to generate new definitions. The StaticLibraryDefinitionGenerator is updated to only pull in new objects from the archive if the lookup kind is Static. This allows lookup to be re-used to emulate dlsym for JIT'd symbols without pulling in new objects from archives (which would not happen in a normal dlsym call). -- JITLink is updated to allow externals to be assigned weak linkage, and weak externals now use the SymbolLookupFlags::WeaklyReferencedSymbol value for lookups. Unresolved weak references will be assigned the default value of zero. Since this patch was modifying the lookup API anyway, it alo replaces all of the "MatchNonExported" boolean arguments with a "JITDylibLookupFlags" enum for readability. If a JITDylib's associated value is JITDylibLookupFlags::MatchExportedSymbolsOnly then the lookup will only match against exported (non-hidden) symbols in that JITDylib. If a JITDylib's associated value is JITDylibLookupFlags::MatchAllSymbols then the lookup will match against any symbol defined in the JITDylib. --- .../llvm/ExecutionEngine/JITLink/JITLink.h | 31 +- llvm/include/llvm/ExecutionEngine/Orc/Core.h | 390 +++++++++-- .../llvm/ExecutionEngine/Orc/ExecutionUtils.h | 13 +- .../llvm/ExecutionEngine/Orc/Speculation.h | 35 +- llvm/lib/ExecutionEngine/JITLink/JITLink.cpp | 10 + .../JITLink/JITLinkGeneric.cpp | 27 +- .../ExecutionEngine/JITLink/JITLinkGeneric.h | 2 +- .../JITLink/MachOLinkGraphBuilder.cpp | 4 +- .../Orc/CompileOnDemandLayer.cpp | 27 +- llvm/lib/ExecutionEngine/Orc/Core.cpp | 637 +++++++++--------- .../ExecutionEngine/Orc/ExecutionUtils.cpp | 84 +-- .../ExecutionEngine/Orc/IndirectionUtils.cpp | 5 +- llvm/lib/ExecutionEngine/Orc/LLJIT.cpp | 4 +- .../lib/ExecutionEngine/Orc/LazyReexports.cpp | 6 +- llvm/lib/ExecutionEngine/Orc/Legacy.cpp | 3 +- .../Orc/ObjectLinkingLayer.cpp | 27 +- .../Orc/RTDyldObjectLinkingLayer.cpp | 13 +- .../JITLink/X86/MachO_weak_references.s | 19 + llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 12 +- .../ExecutionEngine/Orc/CoreAPIsTest.cpp | 198 +++--- .../Orc/LegacyAPIInteropTest.cpp | 9 +- .../Orc/RTDyldObjectLinkingLayerTest.cpp | 11 +- 22 files changed, 1007 insertions(+), 560 deletions(-) create mode 100644 llvm/test/ExecutionEngine/JITLink/X86/MachO_weak_references.s diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index aebd55563e615..7470cca498068 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -324,14 +324,14 @@ class Symbol { } static Symbol &constructExternal(void *SymStorage, Addressable &Base, - StringRef Name, JITTargetAddress Size) { + StringRef Name, JITTargetAddress Size, + Linkage L) { assert(SymStorage && "Storage cannot be null"); assert(!Base.isDefined() && "Cannot create external symbol from defined block"); assert(!Name.empty() && "External symbol name cannot be empty"); auto *Sym = reinterpret_cast(SymStorage); - new (Sym) Symbol(Base, 0, Name, Size, Linkage::Strong, Scope::Default, - false, false); + new (Sym) Symbol(Base, 0, Name, Size, L, Scope::Default, false, false); return *Sym; } @@ -477,7 +477,7 @@ class Symbol { /// Set the linkage for this Symbol. void setLinkage(Linkage L) { - assert((L == Linkage::Strong || (Base->isDefined() && !Name.empty())) && + assert((L == Linkage::Strong || (!Base->isAbsolute() && !Name.empty())) && "Linkage can only be applied to defined named symbols"); this->L = static_cast(L); } @@ -849,9 +849,14 @@ class LinkGraph { /// Add an external symbol. /// Some formats (e.g. ELF) allow Symbols to have sizes. For Symbols whose /// size is not known, you should substitute '0'. - Symbol &addExternalSymbol(StringRef Name, uint64_t Size) { - auto &Sym = Symbol::constructExternal( - Allocator.Allocate(), createAddressable(0, false), Name, Size); + /// For external symbols Linkage determines whether the symbol must be + /// present during lookup: Externals with strong linkage must be found or + /// an error will be emitted. Externals with weak linkage are permitted to + /// be undefined, in which case they are assigned a value of 0. + Symbol &addExternalSymbol(StringRef Name, uint64_t Size, Linkage L) { + auto &Sym = + Symbol::constructExternal(Allocator.Allocate(), + createAddressable(0, false), Name, Size, L); ExternalSymbols.insert(&Sym); return Sym; } @@ -1189,6 +1194,14 @@ struct PassConfiguration { LinkGraphPassList PostFixupPasses; }; +/// Flags for symbol lookup. +/// +/// FIXME: These basically duplicate orc::SymbolLookupFlags -- We should merge +/// the two types once we have an OrcSupport library. +enum class SymbolLookupFlags { RequiredSymbol, WeaklyReferencedSymbol }; + +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF); + /// A map of symbol names to resolved addresses. using AsyncLookupResult = DenseMap; @@ -1223,6 +1236,8 @@ createLookupContinuation(Continuation Cont) { /// Holds context for a single jitLink invocation. class JITLinkContext { public: + using LookupMap = DenseMap; + /// Destroy a JITLinkContext. virtual ~JITLinkContext(); @@ -1240,7 +1255,7 @@ class JITLinkContext { /// Called by JITLink to resolve external symbols. This method is passed a /// lookup continutation which it must call with a result to continue the /// linking process. - virtual void lookup(const DenseSet &Symbols, + virtual void lookup(const LookupMap &Symbols, std::unique_ptr LC) = 0; /// Called by JITLink once all defined symbols in the graph have been assigned diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 4f22a4c387966..8f92b7ab76639 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -45,8 +45,11 @@ using VModuleKey = uint64_t; // efficiency). using SymbolNameSet = DenseSet; +/// A vector of symbol names. +using SymbolNameVector = std::vector; + /// A map from symbol names (as SymbolStringPtrs) to JITSymbols -/// (address/flags pairs). +/// (address/flags pairs). using SymbolMap = DenseMap; /// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags. @@ -55,8 +58,244 @@ using SymbolFlagsMap = DenseMap; /// A map from JITDylibs to sets of symbols. using SymbolDependenceMap = DenseMap; -/// A list of (JITDylib*, bool) pairs. -using JITDylibSearchList = std::vector>; +/// Lookup flags that apply to each dylib in the search order for a lookup. +/// +/// If MatchHiddenSymbolsOnly is used (the default) for a given dylib, then +/// only symbols in that Dylib's interface will be searched. If +/// MatchHiddenSymbols is used then symbols with hidden visibility will match +/// as well. +enum class JITDylibLookupFlags { MatchExportedSymbolsOnly, MatchAllSymbols }; + +/// Lookup flags that apply to each symbol in a lookup. +/// +/// If RequiredSymbol is used (the default) for a given symbol then that symbol +/// must be found during the lookup or the lookup will fail returning a +/// SymbolNotFound error. If WeaklyReferencedSymbol is used and the given +/// symbol is not found then the query will continue, and no result for the +/// missing symbol will be present in the result (assuming the rest of the +/// lookup succeeds). +enum class SymbolLookupFlags { RequiredSymbol, WeaklyReferencedSymbol }; + +/// Describes the kind of lookup being performed. The lookup kind is passed to +/// symbol generators (if they're invoked) to help them determine what +/// definitions to generate. +/// +/// Static -- Lookup is being performed as-if at static link time (e.g. +/// generators representing static archives should pull in new +/// definitions). +/// +/// DLSym -- Lookup is being performed as-if at runtime (e.g. generators +/// representing static archives should not pull in new definitions). +enum class LookupKind { Static, DLSym }; + +/// A list of (JITDylib*, JITDylibLookupFlags) pairs to be used as a search +/// order during symbol lookup. +using JITDylibSearchOrder = + std::vector>; + +/// Convenience function for creating a search order from an ArrayRef of +/// JITDylib*, all with the same flags. +inline JITDylibSearchOrder makeJITDylibSearchOrder( + ArrayRef JDs, + JITDylibLookupFlags Flags = JITDylibLookupFlags::MatchExportedSymbolsOnly) { + JITDylibSearchOrder O; + O.reserve(JDs.size()); + for (auto *JD : JDs) + O.push_back(std::make_pair(JD, Flags)); + return O; +} + +/// A set of symbols to look up, each associated with a SymbolLookupFlags +/// value. +/// +/// This class is backed by a vector and optimized for fast insertion, +/// deletion and iteration. It does not guarantee a stable order between +/// operations, and will not automatically detect duplicate elements (they +/// can be manually checked by calling the validate method). +class SymbolLookupSet { +public: + using value_type = std::pair; + using UnderlyingVector = std::vector; + using iterator = UnderlyingVector::iterator; + using const_iterator = UnderlyingVector::const_iterator; + + SymbolLookupSet() = default; + + explicit SymbolLookupSet( + SymbolStringPtr Name, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + add(std::move(Name), Flags); + } + + /// Construct a SymbolLookupSet from an initializer list of SymbolStringPtrs. + explicit SymbolLookupSet( + std::initializer_list Names, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + Symbols.reserve(Names.size()); + for (auto &Name : Names) + add(std::move(Name), Flags); + } + + /// Construct a SymbolLookupSet from a SymbolNameSet with the given + /// Flags used for each value. + explicit SymbolLookupSet( + const SymbolNameSet &Names, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + Symbols.reserve(Names.size()); + for (const auto &Name : Names) + add(Name, Flags); + } + + /// Construct a SymbolLookupSet from a vector of symbols with the given Flags + /// used for each value. + /// If the ArrayRef contains duplicates it is up to the client to remove these + /// before using this instance for lookup. + explicit SymbolLookupSet( + ArrayRef Names, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + Symbols.reserve(Names.size()); + for (const auto &Name : Names) + add(Name, Flags); + } + + /// Add an element to the set. The client is responsible for checking that + /// duplicates are not added. + void add(SymbolStringPtr Name, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + Symbols.push_back(std::make_pair(std::move(Name), Flags)); + } + + bool empty() const { return Symbols.empty(); } + UnderlyingVector::size_type size() const { return Symbols.size(); } + iterator begin() { return Symbols.begin(); } + iterator end() { return Symbols.end(); } + const_iterator begin() const { return Symbols.begin(); } + const_iterator end() const { return Symbols.end(); } + + /// Removes the Ith element of the vector, replacing it with the last element. + void remove(UnderlyingVector::size_type I) { + std::swap(Symbols[I], Symbols.back()); + Symbols.pop_back(); + } + + /// Removes the element pointed to by the given iterator. This iterator and + /// all subsequent ones (including end()) are invalidated. + void remove(iterator I) { remove(I - begin()); } + + /// Removes all elements matching the given predicate, which must be callable + /// as bool(const SymbolStringPtr &, SymbolLookupFlags Flags). + template void remove_if(PredFn &&Pred) { + UnderlyingVector::size_type I = 0; + while (I != Symbols.size()) { + const auto &Name = Symbols[I].first; + auto Flags = Symbols[I].second; + if (Pred(Name, Flags)) + remove(I); + else + ++I; + } + } + + /// Loop over the elements of this SymbolLookupSet, applying the Body function + /// to each one. Body must be callable as + /// bool(const SymbolStringPtr &, SymbolLookupFlags). + /// If Body returns true then the element just passed in is removed from the + /// set. If Body returns false then the element is retained. + template + auto forEachWithRemoval(BodyFn &&Body) -> typename std::enable_if< + std::is_same(), + std::declval())), + bool>::value>::type { + UnderlyingVector::size_type I = 0; + while (I != Symbols.size()) { + const auto &Name = Symbols[I].first; + auto Flags = Symbols[I].second; + if (Body(Name, Flags)) + remove(I); + else + ++I; + } + } + + /// Loop over the elements of this SymbolLookupSet, applying the Body function + /// to each one. Body must be callable as + /// Expected(const SymbolStringPtr &, SymbolLookupFlags). + /// If Body returns a failure value, the loop exits immediately. If Body + /// returns true then the element just passed in is removed from the set. If + /// Body returns false then the element is retained. + template + auto forEachWithRemoval(BodyFn &&Body) -> typename std::enable_if< + std::is_same(), + std::declval())), + Expected>::value, + Error>::type { + UnderlyingVector::size_type I = 0; + while (I != Symbols.size()) { + const auto &Name = Symbols[I].first; + auto Flags = Symbols[I].second; + auto Remove = Body(Name, Flags); + if (!Remove) + return Remove.takeError(); + if (*Remove) + remove(I); + else + ++I; + } + return Error::success(); + } + + /// Construct a SymbolNameVector from this instance by dropping the Flags + /// values. + SymbolNameVector getSymbolNames() const { + SymbolNameVector Names; + Names.reserve(Symbols.size()); + for (auto &KV : Symbols) + Names.push_back(KV.first); + return Names; + } + + /// Sort the lookup set by pointer value. This sort is fast but sensitive to + /// allocation order and so should not be used where a consistent order is + /// required. + void sortByAddress() { + llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) { + return LHS.first < RHS.first; + }); + } + + /// Sort the lookup set lexicographically. This sort is slow but the order + /// is unaffected by allocation order. + void sortByName() { + llvm::sort(Symbols, [](const value_type &LHS, const value_type &RHS) { + return *LHS.first < *RHS.first; + }); + } + + /// Remove any duplicate elements. If a SymbolLookupSet is not duplicate-free + /// by construction, this method can be used to turn it into a proper set. + void removeDuplicates() { + sortByAddress(); + auto LastI = std::unique(Symbols.begin(), Symbols.end()); + Symbols.erase(LastI, Symbols.end()); + } + +#ifndef NDEBUG + /// Returns true if this set contains any duplicates. This should only be used + /// in assertions. + bool containsDuplicates() { + if (Symbols.size() < 2) + return false; + sortByAddress(); + for (UnderlyingVector::size_type I = 1; I != Symbols.size(); ++I) + if (Symbols[I].first == Symbols[I - 1].first) + return true; + return true; + } +#endif + +private: + UnderlyingVector Symbols; +}; struct SymbolAliasMapEntry { SymbolAliasMapEntry() = default; @@ -76,6 +315,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym); /// Render a SymbolNameSet. raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols); +/// Render a SymbolNameVector. +raw_ostream &operator<<(raw_ostream &OS, const SymbolNameVector &Symbols); + /// Render a SymbolFlagsMap entry. raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap::value_type &KV); @@ -98,8 +340,25 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps); /// Render a MaterializationUnit. raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU); -/// Render a JITDylibSearchList. -raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs); +//// Render a JITDylibLookupFlags instance. +raw_ostream &operator<<(raw_ostream &OS, + const JITDylibLookupFlags &JDLookupFlags); + +/// Rendar a SymbolLookupFlags instance. +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LookupFlags); + +/// Render a JITDylibLookupFlags instance. +raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K); + +/// Render a SymbolLookupSet entry. +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet::value_type &KV); + +/// Render a SymbolLookupSet. +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet &LookupSet); + +/// Render a JITDylibSearchOrder. +raw_ostream &operator<<(raw_ostream &OS, + const JITDylibSearchOrder &SearchOrder); /// Render a SymbolAliasMap. raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases); @@ -107,6 +366,9 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolAliasMap &Aliases); /// Render a SymbolState. raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S); +/// Render a LookupKind. +raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K); + /// Callback to notify client that symbols have been resolved. using SymbolsResolvedCallback = unique_function)>; @@ -139,12 +401,13 @@ class SymbolsNotFound : public ErrorInfo { static char ID; SymbolsNotFound(SymbolNameSet Symbols); + SymbolsNotFound(SymbolNameVector Symbols); std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; - const SymbolNameSet &getSymbols() const { return Symbols; } + const SymbolNameVector &getSymbols() const { return Symbols; } private: - SymbolNameSet Symbols; + SymbolNameVector Symbols; }; /// Used to notify clients that a set of symbols could not be removed. @@ -376,7 +639,8 @@ class ReExportsMaterializationUnit : public MaterializationUnit { /// Note: Care must be taken that no sets of aliases form a cycle, as such /// a cycle will result in a deadlock when any symbol in the cycle is /// resolved. - ReExportsMaterializationUnit(JITDylib *SourceJD, bool MatchNonExported, + ReExportsMaterializationUnit(JITDylib *SourceJD, + JITDylibLookupFlags SourceJDLookupFlags, SymbolAliasMap Aliases, VModuleKey K); StringRef getName() const override; @@ -387,7 +651,7 @@ class ReExportsMaterializationUnit : public MaterializationUnit { static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); JITDylib *SourceJD = nullptr; - bool MatchNonExported = false; + JITDylibLookupFlags SourceJDLookupFlags; SymbolAliasMap Aliases; }; @@ -405,25 +669,26 @@ class ReExportsMaterializationUnit : public MaterializationUnit { inline std::unique_ptr symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) { return std::make_unique( - nullptr, true, std::move(Aliases), std::move(K)); + nullptr, JITDylibLookupFlags::MatchAllSymbols, std::move(Aliases), + std::move(K)); } /// Create a materialization unit for re-exporting symbols from another JITDylib /// with alternative names/flags. -/// If MatchNonExported is true then non-exported symbols from SourceJD can be -/// re-exported. If it is false, attempts to re-export a non-exported symbol -/// will result in a "symbol not found" error. +/// SourceJD will be searched using the given JITDylibLookupFlags. inline std::unique_ptr reexports(JITDylib &SourceJD, SymbolAliasMap Aliases, - bool MatchNonExported = false, VModuleKey K = VModuleKey()) { + JITDylibLookupFlags SourceJDLookupFlags = + JITDylibLookupFlags::MatchExportedSymbolsOnly, + VModuleKey K = VModuleKey()) { return std::make_unique( - &SourceJD, MatchNonExported, std::move(Aliases), std::move(K)); + &SourceJD, SourceJDLookupFlags, std::move(Aliases), std::move(K)); } /// Build a SymbolAliasMap for the common case where you want to re-export /// symbols from another JITDylib with the same linkage/flags. Expected -buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols); +buildSimpleReexportsAAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols); /// Represents the state that a symbol has reached during materialization. enum class SymbolState : uint8_t { @@ -448,7 +713,7 @@ class AsynchronousSymbolQuery { /// Create a query for the given symbols. The NotifyComplete /// callback will be called once all queried symbols reach the given /// minimum state. - AsynchronousSymbolQuery(const SymbolNameSet &Symbols, + AsynchronousSymbolQuery(const SymbolLookupSet &Symbols, SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete); @@ -456,6 +721,15 @@ class AsynchronousSymbolQuery { void notifySymbolMetRequiredState(const SymbolStringPtr &Name, JITEvaluatedSymbol Sym); + /// Remove a symbol from the query. This is used to drop weakly referenced + /// symbols that are not found. + void dropSymbol(const SymbolStringPtr &Name) { + assert(ResolvedSymbols.count(Name) && + "Redundant removal of weakly-referenced symbol"); + ResolvedSymbols.erase(Name); + --OutstandingSymbolsCount; + } + /// Returns true if all symbols covered by this query have been /// resolved. bool isComplete() const { return OutstandingSymbolsCount == 0; } @@ -497,11 +771,21 @@ class JITDylib { friend class ExecutionSession; friend class MaterializationResponsibility; public: + /// Definition generators can be attached to JITDylibs to generate new + /// definitions for otherwise unresolved symbols during lookup. class DefinitionGenerator { public: virtual ~DefinitionGenerator(); - virtual Expected - tryToGenerate(JITDylib &Parent, const SymbolNameSet &Names) = 0; + + /// DefinitionGenerators should override this method to insert new + /// definitions into the parent JITDylib. K specifies the kind of this + /// lookup. JD specifies the target JITDylib being searched, and + /// JDLookupFlags specifies whether the search should match against + /// hidden symbols. Finally, Symbols describes the set of unresolved + /// symbols and their associated lookup flags. + virtual Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) = 0; }; using AsynchronousSymbolQuerySet = @@ -552,18 +836,20 @@ class JITDylib { /// as the first in the search order (instead of this dylib) ensures that /// definitions within this dylib resolve to the lazy-compiling stubs, /// rather than immediately materializing the definitions in this dylib. - void setSearchOrder(JITDylibSearchList NewSearchOrder, - bool SearchThisJITDylibFirst = true, - bool MatchNonExportedInThisDylib = true); + void setSearchOrder(JITDylibSearchOrder NewSearchOrder, + bool SearchThisJITDylibFirst = true); /// Add the given JITDylib to the search order for definitions in this /// JITDylib. - void addToSearchOrder(JITDylib &JD, bool MatcNonExported = false); + void addToSearchOrder(JITDylib &JD, + JITDylibLookupFlags JDLookupFlags = + JITDylibLookupFlags::MatchExportedSymbolsOnly); /// Replace OldJD with NewJD in the search order if OldJD is present. /// Otherwise this operation is a no-op. void replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD, - bool MatchNonExported = false); + JITDylibLookupFlags JDLookupFlags = + JITDylibLookupFlags::MatchExportedSymbolsOnly); /// Remove the given JITDylib from the search order for this JITDylib if it is /// present. Otherwise this operation is a no-op. @@ -572,7 +858,7 @@ class JITDylib { /// Do something with the search order (run under the session lock). template auto withSearchOrderDo(Func &&F) - -> decltype(F(std::declval())); + -> decltype(F(std::declval())); /// Define all symbols provided by the materialization unit to be part of this /// JITDylib. @@ -605,8 +891,11 @@ class JITDylib { Error remove(const SymbolNameSet &Names); /// Search the given JITDylib for the symbols in Symbols. If found, store - /// the flags for each symbol in Flags. Returns any unresolved symbols. - Expected lookupFlags(const SymbolNameSet &Names); + /// the flags for each symbol in Flags. If any required symbols are not found + /// then an error will be returned. + Expected lookupFlags(LookupKind K, + JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet LookupSet); /// Dump current JITDylib state to OS. void dump(raw_ostream &OS); @@ -709,20 +998,23 @@ class JITDylib { Error defineImpl(MaterializationUnit &MU); - Expected lookupFlagsImpl(SymbolFlagsMap &Flags, - const SymbolNameSet &Names); + void lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K, + JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved); - Error lodgeQuery(std::shared_ptr &Q, - SymbolNameSet &Unresolved, bool MatchNonExported, - MaterializationUnitList &MUs); + Error lodgeQuery(MaterializationUnitList &MUs, + std::shared_ptr &Q, LookupKind K, + JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved); - Error lodgeQueryImpl(std::shared_ptr &Q, - SymbolNameSet &Unresolved, bool MatchNonExported, - MaterializationUnitList &MUs); + Error lodgeQueryImpl(MaterializationUnitList &MUs, + std::shared_ptr &Q, + LookupKind K, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved); bool lookupImpl(std::shared_ptr &Q, std::vector> &MUs, - SymbolNameSet &Unresolved); + SymbolLookupSet &Unresolved); void detachQueryHelper(AsynchronousSymbolQuery &Q, const SymbolNameSet &QuerySymbols); @@ -754,7 +1046,7 @@ class JITDylib { UnmaterializedInfosMap UnmaterializedInfos; MaterializingInfosMap MaterializingInfos; std::vector> DefGenerators; - JITDylibSearchList SearchOrder; + JITDylibSearchOrder SearchOrder; }; /// An ExecutionSession represents a running JIT program. @@ -863,8 +1155,9 @@ class ExecutionSession { /// dependenant symbols for this query (e.g. it is being made by a top level /// client to get an address to call) then the value NoDependenciesToRegister /// can be used. - void lookup(const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols, - SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete, + void lookup(LookupKind K, const JITDylibSearchOrder &SearchOrder, + SymbolLookupSet Symbols, SymbolState RequiredState, + SymbolsResolvedCallback NotifyComplete, RegisterDependenciesFunction RegisterDependencies); /// Blocking version of lookup above. Returns the resolved symbol map. @@ -874,8 +1167,9 @@ class ExecutionSession { /// or an error occurs. If WaitUntilReady is false and an error occurs /// after resolution, the function will return a success value, but the /// error will be reported via reportErrors. - Expected lookup(const JITDylibSearchList &SearchOrder, - const SymbolNameSet &Symbols, + Expected lookup(const JITDylibSearchOrder &SearchOrder, + const SymbolLookupSet &Symbols, + LookupKind K = LookupKind::Static, SymbolState RequiredState = SymbolState::Ready, RegisterDependenciesFunction RegisterDependencies = NoDependenciesToRegister); @@ -883,7 +1177,7 @@ class ExecutionSession { /// Convenience version of blocking lookup. /// Searches each of the JITDylibs in the search order in turn for the given /// symbol. - Expected lookup(const JITDylibSearchList &SearchOrder, + Expected lookup(const JITDylibSearchOrder &SearchOrder, SymbolStringPtr Symbol); /// Convenience version of blocking lookup. @@ -951,7 +1245,7 @@ GeneratorT &JITDylib::addGenerator(std::unique_ptr DefGenerator) { template auto JITDylib::withSearchOrderDo(Func &&F) - -> decltype(F(std::declval())) { + -> decltype(F(std::declval())) { return ES.runSessionLocked([&]() { return F(SearchOrder); }); } @@ -997,15 +1291,17 @@ class ReexportsGenerator : public JITDylib::DefinitionGenerator { /// Create a reexports generator. If an Allow predicate is passed, only /// symbols for which the predicate returns true will be reexported. If no /// Allow predicate is passed, all symbols will be exported. - ReexportsGenerator(JITDylib &SourceJD, bool MatchNonExported = false, + ReexportsGenerator(JITDylib &SourceJD, + JITDylibLookupFlags SourceJDLookupFlags, SymbolPredicate Allow = SymbolPredicate()); - Expected tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) override; + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) override; private: JITDylib &SourceJD; - bool MatchNonExported = false; + JITDylibLookupFlags SourceJDLookupFlags; SymbolPredicate Allow; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index b9bbace6f6308..7d051ed990635 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -268,8 +268,9 @@ class DynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { return Load(nullptr, GlobalPrefix, std::move(Allow)); } - Expected tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) override; + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) override; private: sys::DynamicLibrary Dylib; @@ -297,8 +298,9 @@ class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator { static Expected> Create(ObjectLayer &L, std::unique_ptr ArchiveBuffer); - Expected tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) override; + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) override; private: StaticLibraryDefinitionGenerator(ObjectLayer &L, @@ -307,8 +309,7 @@ class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator { ObjectLayer &L; std::unique_ptr ArchiveBuffer; - object::Archive Archive; - size_t UnrealizedObjects = 0; + std::unique_ptr Archive; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index 766a6b070f12f..f6b86bb231678 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -100,23 +100,27 @@ class Speculator { SymbolsInJD.insert(ImplSymbolName); } - DEBUG_WITH_TYPE("orc", for (auto &I - : SpeculativeLookUpImpls) { - llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib "; - for (auto &N : I.second) - llvm::dbgs() << "\n Likely Symbol : " << N; + DEBUG_WITH_TYPE("orc", { + for (auto &I : SpeculativeLookUpImpls) { + llvm::dbgs() << "\n In " << I.first->getName() << " JITDylib "; + for (auto &N : I.second) + llvm::dbgs() << "\n Likely Symbol : " << N; + } }); // for a given symbol, there may be no symbol qualified for speculatively // compile try to fix this before jumping to this code if possible. for (auto &LookupPair : SpeculativeLookUpImpls) - ES.lookup(JITDylibSearchList({{LookupPair.first, true}}), - LookupPair.second, SymbolState::Ready, - [this](Expected Result) { - if (auto Err = Result.takeError()) - ES.reportError(std::move(Err)); - }, - NoDependenciesToRegister); + ES.lookup( + LookupKind::Static, + makeJITDylibSearchOrder(LookupPair.first, + JITDylibLookupFlags::MatchAllSymbols), + SymbolLookupSet(LookupPair.second), SymbolState::Ready, + [this](Expected Result) { + if (auto Err = Result.takeError()) + ES.reportError(std::move(Err)); + }, + NoDependenciesToRegister); } public: @@ -151,8 +155,11 @@ class Speculator { this->getES().reportError(ReadySymbol.takeError()); }; // Include non-exported symbols also. - ES.lookup(JITDylibSearchList({{JD, true}}), SymbolNameSet({Target}), - SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister); + ES.lookup( + LookupKind::Static, + makeJITDylibSearchOrder(JD, JITDylibLookupFlags::MatchAllSymbols), + SymbolLookupSet(Target, SymbolLookupFlags::WeaklyReferencedSymbol), + SymbolState::Ready, OnReadyFixUp, NoDependenciesToRegister); } } diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp index 9df79670d9fba..6c924f8895776 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -266,6 +266,16 @@ void LinkGraph::dump(raw_ostream &OS, << "\n"; } +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF) { + switch (LF) { + case SymbolLookupFlags::RequiredSymbol: + return OS << "RequiredSymbol"; + case SymbolLookupFlags::WeaklyReferencedSymbol: + return OS << "WeaklyReferencedSymbol"; + } + llvm_unreachable("Unrecognized lookup flags"); +} + void JITLinkAsyncLookupContinuation::anchor() {} JITLinkContext::~JITLinkContext() {} diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp index 9707b9624d936..7b594fd2c0ea9 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp @@ -257,25 +257,35 @@ Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) { return Error::success(); } -DenseSet JITLinkerBase::getExternalSymbolNames() const { +JITLinkContext::LookupMap JITLinkerBase::getExternalSymbolNames() const { // Identify unresolved external symbols. - DenseSet UnresolvedExternals; + JITLinkContext::LookupMap UnresolvedExternals; for (auto *Sym : G->external_symbols()) { assert(Sym->getAddress() == 0 && "External has already been assigned an address"); assert(Sym->getName() != StringRef() && Sym->getName() != "" && "Externals must be named"); - UnresolvedExternals.insert(Sym->getName()); + SymbolLookupFlags LookupFlags = + Sym->getLinkage() == Linkage::Weak + ? SymbolLookupFlags::WeaklyReferencedSymbol + : SymbolLookupFlags::RequiredSymbol; + UnresolvedExternals[Sym->getName()] = LookupFlags; } return UnresolvedExternals; } void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { for (auto *Sym : G->external_symbols()) { + assert(Sym->getOffset() == 0 && + "External symbol is not at the start of its addressable block"); assert(Sym->getAddress() == 0 && "Symbol already resolved"); assert(!Sym->isDefined() && "Symbol being resolved is already defined"); - assert(Result.count(Sym->getName()) && "Missing resolution for symbol"); - Sym->getAddressable().setAddress(Result[Sym->getName()].getAddress()); + auto ResultI = Result.find(Sym->getName()); + if (ResultI != Result.end()) + Sym->getAddressable().setAddress(ResultI->second.getAddress()); + else + assert(Sym->getLinkage() == Linkage::Weak && + "Failed to resolve non-weak reference"); } LLVM_DEBUG({ @@ -285,8 +295,11 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) { << formatv("{0:x16}", Sym->getAddress()) << "\n"; }); assert(llvm::all_of(G->external_symbols(), - [](Symbol *Sym) { return Sym->getAddress() != 0; }) && - "All symbols should have been resolved by this point"); + [](Symbol *Sym) { + return Sym->getAddress() != 0 || + Sym->getLinkage() == Linkage::Weak; + }) && + "All strong external symbols should have been resolved by now"); } void JITLinkerBase::deallocateAndBailOut(Error Err) { diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h index 07dee6cee2002..d5687b7afc967 100644 --- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h +++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h @@ -106,7 +106,7 @@ class JITLinkerBase { SegmentLayoutMap layOutBlocks(); Error allocateSegments(const SegmentLayoutMap &Layout); - DenseSet getExternalSymbolNames() const; + JITLinkContext::LookupMap getExternalSymbolNames() const; void applyLookupResult(AsyncLookupResult LR); void deallocateAndBailOut(Error Err); diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp index c1dc138ee7024..1881bd0b287e0 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp @@ -321,7 +321,9 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() { return make_error("Anonymous external symbol at " "index " + Twine(KV.first)); - NSym.GraphSymbol = &G->addExternalSymbol(*NSym.Name, 0); + NSym.GraphSymbol = &G->addExternalSymbol( + *NSym.Name, 0, + NSym.Desc & MachO::N_WEAK_REF ? Linkage::Weak : Linkage::Strong); } break; case MachO::N_ABS: diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp index 75ddbc30445d2..b8e4292202783 100644 --- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -162,7 +162,8 @@ void CompileOnDemandLayer::emit(MaterializationResponsibility R, return; } - R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), true)); + R.replace(reexports(PDR.getImplDylib(), std::move(NonCallables), + JITDylibLookupFlags::MatchAllSymbols)); R.replace(lazyReexports(LCTMgr, PDR.getISManager(), PDR.getImplDylib(), std::move(Callables), AliaseeImpls)); } @@ -173,16 +174,20 @@ CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) { if (I == DylibResources.end()) { auto &ImplD = getExecutionSession().createJITDylib( TargetD.getName() + ".impl", false); - TargetD.withSearchOrderDo([&](const JITDylibSearchList &TargetSearchOrder) { - auto NewSearchOrder = TargetSearchOrder; - assert(!NewSearchOrder.empty() && - NewSearchOrder.front().first == &TargetD && - NewSearchOrder.front().second == true && - "TargetD must be at the front of its own search order and match " - "non-exported symbol"); - NewSearchOrder.insert(std::next(NewSearchOrder.begin()), {&ImplD, true}); - ImplD.setSearchOrder(std::move(NewSearchOrder), false); - }); + TargetD.withSearchOrderDo( + [&](const JITDylibSearchOrder &TargetSearchOrder) { + auto NewSearchOrder = TargetSearchOrder; + assert( + !NewSearchOrder.empty() && + NewSearchOrder.front().first == &TargetD && + NewSearchOrder.front().second == + JITDylibLookupFlags::MatchAllSymbols && + "TargetD must be at the front of its own search order and match " + "non-exported symbol"); + NewSearchOrder.insert(std::next(NewSearchOrder.begin()), + {&ImplD, JITDylibLookupFlags::MatchAllSymbols}); + ImplD.setSearchOrder(std::move(NewSearchOrder), false); + }); PerDylibResources PDR(ImplD, BuildIndirectStubsManager()); I = DylibResources.insert(std::make_pair(&TargetD, std::move(PDR))).first; } diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 9e024ba0f10f8..0814ec1c5f8c4 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/Core.h" + +#include "llvm/ADT/STLExtras.h" #include "llvm/Config/llvm-config.h" #include "llvm/ExecutionEngine/Orc/OrcError.h" #include "llvm/IR/Mangler.h" @@ -77,16 +79,19 @@ bool flagsMatchCLOpts(const JITSymbolFlags &Flags) { #endif // NDEBUG } -// Prints a set of items, filtered by an user-supplied predicate. -template > -class SetPrinter { +// Prints a sequence of items, filtered by an user-supplied predicate. +template > +class SequencePrinter { public: - SetPrinter(const Set &S, Pred ShouldPrint = Pred()) - : S(S), ShouldPrint(std::move(ShouldPrint)) {} + SequencePrinter(const Sequence &S, char OpenSeq, char CloseSeq, + Pred ShouldPrint = Pred()) + : S(S), OpenSeq(OpenSeq), CloseSeq(CloseSeq), + ShouldPrint(std::move(ShouldPrint)) {} void printTo(llvm::raw_ostream &OS) const { bool PrintComma = false; - OS << "{"; + OS << OpenSeq; for (auto &E : S) { if (ShouldPrint(E)) { if (PrintComma) @@ -95,23 +100,26 @@ class SetPrinter { PrintComma = true; } } - OS << " }"; + OS << ' ' << CloseSeq; } private: - const Set &S; + const Sequence &S; + char OpenSeq; + char CloseSeq; mutable Pred ShouldPrint; }; -template -SetPrinter printSet(const Set &S, Pred P = Pred()) { - return SetPrinter(S, std::move(P)); +template +SequencePrinter printSequence(const Sequence &S, char OpenSeq, + char CloseSeq, Pred P = Pred()) { + return SequencePrinter(S, OpenSeq, CloseSeq, std::move(P)); } -// Render a SetPrinter by delegating to its printTo method. -template +// Render a SequencePrinter by delegating to its printTo method. +template llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const SetPrinter &Printer) { + const SequencePrinter &Printer) { Printer.printTo(OS); return OS; } @@ -147,7 +155,11 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPtr &Sym) { } raw_ostream &operator<<(raw_ostream &OS, const SymbolNameSet &Symbols) { - return OS << printSet(Symbols, PrintAll()); + return OS << printSequence(Symbols, '{', '}', PrintAll()); +} + +raw_ostream &operator<<(raw_ostream &OS, const SymbolNameVector &Symbols) { + return OS << printSequence(Symbols, '[', ']', PrintAll()); } raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) { @@ -182,11 +194,13 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolMap::value_type &KV) { } raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap &SymbolFlags) { - return OS << printSet(SymbolFlags, PrintSymbolFlagsMapElemsMatchingCLOpts()); + return OS << printSequence(SymbolFlags, '{', '}', + PrintSymbolFlagsMapElemsMatchingCLOpts()); } raw_ostream &operator<<(raw_ostream &OS, const SymbolMap &Symbols) { - return OS << printSet(Symbols, PrintSymbolMapElemsMatchingCLOpts()); + return OS << printSequence(Symbols, '{', '}', + PrintSymbolMapElemsMatchingCLOpts()); } raw_ostream &operator<<(raw_ostream &OS, @@ -195,7 +209,8 @@ raw_ostream &operator<<(raw_ostream &OS, } raw_ostream &operator<<(raw_ostream &OS, const SymbolDependenceMap &Deps) { - return OS << printSet(Deps, PrintAll()); + return OS << printSequence(Deps, '{', '}', + PrintAll()); } raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU) { @@ -205,16 +220,59 @@ raw_ostream &operator<<(raw_ostream &OS, const MaterializationUnit &MU) { return OS << ")"; } -raw_ostream &operator<<(raw_ostream &OS, const JITDylibSearchList &JDs) { +raw_ostream &operator<<(raw_ostream &OS, const LookupKind &K) { + switch (K) { + case LookupKind::Static: + return OS << "Static"; + case LookupKind::DLSym: + return OS << "DLSym"; + } + llvm_unreachable("Invalid lookup kind"); +} + +raw_ostream &operator<<(raw_ostream &OS, + const JITDylibLookupFlags &JDLookupFlags) { + switch (JDLookupFlags) { + case JITDylibLookupFlags::MatchExportedSymbolsOnly: + return OS << "MatchExportedSymbolsOnly"; + case JITDylibLookupFlags::MatchAllSymbols: + return OS << "MatchAllSymbols"; + } + llvm_unreachable("Invalid JITDylib lookup flags"); +} + +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LookupFlags) { + switch (LookupFlags) { + case SymbolLookupFlags::RequiredSymbol: + return OS << "RequiredSymbol"; + case SymbolLookupFlags::WeaklyReferencedSymbol: + return OS << "WeaklyReferencedSymbol"; + } + llvm_unreachable("Invalid symbol lookup flags"); +} + +raw_ostream &operator<<(raw_ostream &OS, + const SymbolLookupSet::value_type &KV) { + return OS << "(" << KV.first << ", " << KV.second << ")"; +} + +raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupSet &LookupSet) { + return OS << printSequence(LookupSet, '{', '}', + PrintAll()); +} + +raw_ostream &operator<<(raw_ostream &OS, + const JITDylibSearchOrder &SearchOrder) { OS << "["; - if (!JDs.empty()) { - assert(JDs.front().first && "JITDylibList entries must not be null"); - OS << " (\"" << JDs.front().first->getName() << "\", " - << (JDs.front().second ? "true" : "false") << ")"; - for (auto &KV : make_range(std::next(JDs.begin()), JDs.end())) { + if (!SearchOrder.empty()) { + assert(SearchOrder.front().first && + "JITDylibList entries must not be null"); + OS << " (\"" << SearchOrder.front().first->getName() << "\", " + << SearchOrder.begin()->second << ")"; + for (auto &KV : + make_range(std::next(SearchOrder.begin(), 1), SearchOrder.end())) { assert(KV.first && "JITDylibList entries must not be null"); - OS << ", (\"" << KV.first->getName() << "\", " - << (KV.second ? "true" : "false") << ")"; + OS << ", (\"" << KV.first->getName() << "\", " << KV.second << ")"; } } OS << " ]"; @@ -262,7 +320,13 @@ void FailedToMaterialize::log(raw_ostream &OS) const { OS << "Failed to materialize symbols: " << *Symbols; } -SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) +SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) { + for (auto &Sym : Symbols) + this->Symbols.push_back(Sym); + assert(!this->Symbols.empty() && "Can not fail to resolve an empty set"); +} + +SymbolsNotFound::SymbolsNotFound(SymbolNameVector Symbols) : Symbols(std::move(Symbols)) { assert(!this->Symbols.empty() && "Can not fail to resolve an empty set"); } @@ -289,7 +353,7 @@ void SymbolsCouldNotBeRemoved::log(raw_ostream &OS) const { } AsynchronousSymbolQuery::AsynchronousSymbolQuery( - const SymbolNameSet &Symbols, SymbolState RequiredState, + const SymbolLookupSet &Symbols, SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete) : NotifyComplete(std::move(NotifyComplete)), RequiredState(RequiredState) { assert(RequiredState >= SymbolState::Resolved && @@ -298,8 +362,8 @@ AsynchronousSymbolQuery::AsynchronousSymbolQuery( OutstandingSymbolsCount = Symbols.size(); - for (auto &S : Symbols) - ResolvedSymbols[S] = nullptr; + for (auto &KV : Symbols) + ResolvedSymbols[KV.first] = nullptr; } void AsynchronousSymbolQuery::notifySymbolMetRequiredState( @@ -511,10 +575,10 @@ AbsoluteSymbolsMaterializationUnit::extractFlags(const SymbolMap &Symbols) { } ReExportsMaterializationUnit::ReExportsMaterializationUnit( - JITDylib *SourceJD, bool MatchNonExported, SymbolAliasMap Aliases, - VModuleKey K) + JITDylib *SourceJD, JITDylibLookupFlags SourceJDLookupFlags, + SymbolAliasMap Aliases, VModuleKey K) : MaterializationUnit(extractFlags(Aliases), std::move(K)), - SourceJD(SourceJD), MatchNonExported(MatchNonExported), + SourceJD(SourceJD), SourceJDLookupFlags(SourceJDLookupFlags), Aliases(std::move(Aliases)) {} StringRef ReExportsMaterializationUnit::getName() const { @@ -551,7 +615,7 @@ void ReExportsMaterializationUnit::materialize( if (!Aliases.empty()) { if (SourceJD) - R.replace(reexports(*SourceJD, std::move(Aliases), MatchNonExported)); + R.replace(reexports(*SourceJD, std::move(Aliases), SourceJDLookupFlags)); else R.replace(symbolAliases(std::move(Aliases))); } @@ -572,11 +636,11 @@ void ReExportsMaterializationUnit::materialize( // be waitin on a symbol that it itself had to resolve. Usually this will just // involve one round and a single query. - std::vector>> + std::vector>> QueryInfos; while (!RequestedAliases.empty()) { SymbolNameSet ResponsibilitySymbols; - SymbolNameSet QuerySymbols; + SymbolLookupSet QuerySymbols; SymbolAliasMap QueryAliases; // Collect as many aliases as we can without including a chain. @@ -587,7 +651,7 @@ void ReExportsMaterializationUnit::materialize( continue; ResponsibilitySymbols.insert(KV.first); - QuerySymbols.insert(KV.second.Aliasee); + QuerySymbols.add(KV.second.Aliasee); QueryAliases[KV.first] = std::move(KV.second); } @@ -657,8 +721,9 @@ void ReExportsMaterializationUnit::materialize( } }; - ES.lookup(JITDylibSearchList({{&SrcJD, MatchNonExported}}), QuerySymbols, - SymbolState::Resolved, std::move(OnComplete), + ES.lookup(LookupKind::Static, + JITDylibSearchOrder({{&SrcJD, SourceJDLookupFlags}}), + QuerySymbols, SymbolState::Resolved, std::move(OnComplete), std::move(RegisterDependencies)); } } @@ -681,16 +746,16 @@ ReExportsMaterializationUnit::extractFlags(const SymbolAliasMap &Aliases) { Expected buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) { - auto Flags = SourceJD.lookupFlags(Symbols); + SymbolLookupSet LookupSet(Symbols); + auto Flags = SourceJD.lookupFlags( + LookupKind::Static, JITDylibLookupFlags::MatchAllSymbols, LookupSet); if (!Flags) return Flags.takeError(); - if (Flags->size() != Symbols.size()) { - SymbolNameSet Unresolved = Symbols; - for (auto &KV : *Flags) - Unresolved.erase(KV.first); - return make_error(std::move(Unresolved)); + if (!LookupSet.empty()) { + LookupSet.sortByName(); + return make_error(LookupSet.getSymbolNames()); } SymbolAliasMap Result; @@ -703,32 +768,32 @@ buildSimpleReexportsAliasMap(JITDylib &SourceJD, const SymbolNameSet &Symbols) { } ReexportsGenerator::ReexportsGenerator(JITDylib &SourceJD, - bool MatchNonExported, + JITDylibLookupFlags SourceJDLookupFlags, SymbolPredicate Allow) - : SourceJD(SourceJD), MatchNonExported(MatchNonExported), + : SourceJD(SourceJD), SourceJDLookupFlags(SourceJDLookupFlags), Allow(std::move(Allow)) {} -Expected -ReexportsGenerator::tryToGenerate(JITDylib &JD, const SymbolNameSet &Names) { - orc::SymbolNameSet Added; - orc::SymbolAliasMap AliasMap; - - auto Flags = SourceJD.lookupFlags(Names); +Error ReexportsGenerator::tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) { + assert(&JD != &SourceJD && "Cannot re-export from the same dylib"); + // Use lookupFlags to find the subset of symbols that match our lookup. + auto Flags = SourceJD.lookupFlags(K, JDLookupFlags, LookupSet); if (!Flags) return Flags.takeError(); - for (auto &KV : *Flags) { - if (Allow && !Allow(KV.first)) - continue; - AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second); - Added.insert(KV.first); - } + // Create an alias map. + orc::SymbolAliasMap AliasMap; + for (auto &KV : *Flags) + if (!Allow || Allow(KV.first)) + AliasMap[KV.first] = SymbolAliasMapEntry(KV.first, KV.second); - if (!Added.empty()) - cantFail(JD.define(reexports(SourceJD, AliasMap, MatchNonExported))); + if (AliasMap.empty()) + return Error::success(); - return Added; + // Define the re-exports. + return JD.define(reexports(SourceJD, AliasMap, SourceJDLookupFlags)); } JITDylib::DefinitionGenerator::~DefinitionGenerator() {} @@ -1252,41 +1317,41 @@ void JITDylib::notifyFailed(FailedSymbolsWorklist Worklist) { Q->handleFailed(make_error(FailedSymbolsMap)); } -void JITDylib::setSearchOrder(JITDylibSearchList NewSearchOrder, - bool SearchThisJITDylibFirst, - bool MatchNonExportedInThisDylib) { - if (SearchThisJITDylibFirst) { - if (NewSearchOrder.empty() || NewSearchOrder.front().first != this) - NewSearchOrder.insert(NewSearchOrder.begin(), - {this, MatchNonExportedInThisDylib}); - } - - ES.runSessionLocked([&]() { SearchOrder = std::move(NewSearchOrder); }); -} - -void JITDylib::addToSearchOrder(JITDylib &JD, bool MatchNonExported) { +void JITDylib::setSearchOrder(JITDylibSearchOrder NewSearchOrder, + bool SearchThisJITDylibFirst) { ES.runSessionLocked([&]() { - SearchOrder.push_back({&JD, MatchNonExported}); + if (SearchThisJITDylibFirst) { + SearchOrder.clear(); + if (NewSearchOrder.empty() || NewSearchOrder.front().first != this) + SearchOrder.push_back( + std::make_pair(this, JITDylibLookupFlags::MatchAllSymbols)); + SearchOrder.insert(SearchOrder.end(), NewSearchOrder.begin(), + NewSearchOrder.end()); + } else + SearchOrder = std::move(NewSearchOrder); }); } +void JITDylib::addToSearchOrder(JITDylib &JD, + JITDylibLookupFlags JDLookupFlags) { + ES.runSessionLocked([&]() { SearchOrder.push_back({&JD, JDLookupFlags}); }); +} + void JITDylib::replaceInSearchOrder(JITDylib &OldJD, JITDylib &NewJD, - bool MatchNonExported) { + JITDylibLookupFlags JDLookupFlags) { ES.runSessionLocked([&]() { - auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(), - [&](const JITDylibSearchList::value_type &KV) { - return KV.first == &OldJD; - }); - - if (I != SearchOrder.end()) - *I = {&NewJD, MatchNonExported}; + for (auto &KV : SearchOrder) + if (KV.first == &OldJD) { + KV = {&NewJD, JDLookupFlags}; + break; + } }); } void JITDylib::removeFromSearchOrder(JITDylib &JD) { ES.runSessionLocked([&]() { auto I = std::find_if(SearchOrder.begin(), SearchOrder.end(), - [&](const JITDylibSearchList::value_type &KV) { + [&](const JITDylibSearchOrder::value_type &KV) { return KV.first == &JD; }); if (I != SearchOrder.end()) @@ -1349,63 +1414,54 @@ Error JITDylib::remove(const SymbolNameSet &Names) { }); } -Expected JITDylib::lookupFlags(const SymbolNameSet &Names) { +Expected +JITDylib::lookupFlags(LookupKind K, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet LookupSet) { return ES.runSessionLocked([&, this]() -> Expected { SymbolFlagsMap Result; - auto Unresolved = lookupFlagsImpl(Result, Names); - if (!Unresolved) - return Unresolved.takeError(); + lookupFlagsImpl(Result, K, JDLookupFlags, LookupSet); - /// Run any definition generators. + // Run any definition generators. for (auto &DG : DefGenerators) { - // Bail out early if we've resolved everything. - if (Unresolved->empty()) + // Bail out early if we found everything. + if (LookupSet.empty()) break; // Run this generator. - auto NewDefs = DG->tryToGenerate(*this, *Unresolved); - if (!NewDefs) - return NewDefs.takeError(); - - if (!NewDefs->empty()) { - auto Unresolved2 = lookupFlagsImpl(Result, *NewDefs); - if (!Unresolved2) - return Unresolved2.takeError(); - (void)Unresolved2; - assert(Unresolved2->empty() && - "All fallback defs should have been found by lookupFlagsImpl"); - } + if (auto Err = DG->tryToGenerate(K, *this, JDLookupFlags, LookupSet)) + return std::move(Err); - for (auto &Name : *NewDefs) - Unresolved->erase(Name); + // Re-try the search. + lookupFlagsImpl(Result, K, JDLookupFlags, LookupSet); } + return Result; }); } -Expected JITDylib::lookupFlagsImpl(SymbolFlagsMap &Flags, - const SymbolNameSet &Names) { - SymbolNameSet Unresolved; +void JITDylib::lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K, + JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &LookupSet) { - for (auto &Name : Names) { - auto I = Symbols.find(Name); - if (I != Symbols.end()) { - assert(!Flags.count(Name) && "Symbol already present in Flags map"); - Flags[Name] = I->second.getFlags(); - } else - Unresolved.insert(Name); - } - - return Unresolved; + LookupSet.forEachWithRemoval( + [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) -> bool { + auto I = Symbols.find(Name); + if (I == Symbols.end()) + return false; + assert(!Result.count(Name) && "Symbol already present in Flags map"); + Result[Name] = I->second.getFlags(); + return true; + }); } -Error JITDylib::lodgeQuery(std::shared_ptr &Q, - SymbolNameSet &Unresolved, bool MatchNonExported, - MaterializationUnitList &MUs) { +Error JITDylib::lodgeQuery(MaterializationUnitList &MUs, + std::shared_ptr &Q, + LookupKind K, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved) { assert(Q && "Query can not be null"); - if (auto Err = lodgeQueryImpl(Q, Unresolved, MatchNonExported, MUs)) + if (auto Err = lodgeQueryImpl(MUs, Q, K, JDLookupFlags, Unresolved)) return Err; // Run any definition generators. @@ -1416,104 +1472,86 @@ Error JITDylib::lodgeQuery(std::shared_ptr &Q, break; // Run the generator. - auto NewDefs = DG->tryToGenerate(*this, Unresolved); - - // If the generator returns an error then bail out. - if (!NewDefs) - return NewDefs.takeError(); - - // If the generator was able to generate new definitions for any of the - // unresolved symbols then lodge the query against them. - if (!NewDefs->empty()) { - for (auto &D : *NewDefs) - Unresolved.erase(D); - - // Lodge query. This can not fail as any new definitions were added - // by the generator under the session locked. Since they can't have - // started materializing yet the can not have failed. - cantFail(lodgeQueryImpl(Q, *NewDefs, MatchNonExported, MUs)); + if (auto Err = DG->tryToGenerate(K, *this, JDLookupFlags, Unresolved)) + return Err; - assert(NewDefs->empty() && - "All fallback defs should have been found by lookupImpl"); - } + // Lodge query. This can not fail as any new definitions were added + // by the generator under the session locked. Since they can't have + // started materializing yet they can not have failed. + cantFail(lodgeQueryImpl(MUs, Q, K, JDLookupFlags, Unresolved)); } return Error::success(); } -Error JITDylib::lodgeQueryImpl( - std::shared_ptr &Q, SymbolNameSet &Unresolved, - bool MatchNonExported, - std::vector> &MUs) { - - std::vector ToRemove; - for (auto Name : Unresolved) { - - // Search for the name in Symbols. Skip it if not found. - auto SymI = Symbols.find(Name); - if (SymI == Symbols.end()) - continue; - - // If this is a non exported symbol and we're skipping those then skip it. - if (!SymI->second.getFlags().isExported() && !MatchNonExported) - continue; - - // If we matched against Name in JD, mark it to be removed from the - // Unresolved set. - ToRemove.push_back(Name); - - // If we matched against this symbol but it is in the error state then - // bail out and treat it as a failure to materialize. - if (SymI->second.getFlags().hasError()) { - auto FailedSymbolsMap = std::make_shared(); - (*FailedSymbolsMap)[this] = {Name}; - return make_error(std::move(FailedSymbolsMap)); - } - - // If this symbol already meets the required state for then notify the - // query and continue. - if (SymI->second.getState() >= Q->getRequiredState()) { - Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); - continue; - } - - // Otherwise this symbol does not yet meet the required state. Check whether - // it has a materializer attached, and if so prepare to run it. - if (SymI->second.hasMaterializerAttached()) { - assert(SymI->second.getAddress() == 0 && - "Symbol not resolved but already has address?"); - auto UMII = UnmaterializedInfos.find(Name); - assert(UMII != UnmaterializedInfos.end() && - "Lazy symbol should have UnmaterializedInfo"); - auto MU = std::move(UMII->second->MU); - assert(MU != nullptr && "Materializer should not be null"); - - // Move all symbols associated with this MaterializationUnit into - // materializing state. - for (auto &KV : MU->getSymbols()) { - auto SymK = Symbols.find(KV.first); - SymK->second.setMaterializerAttached(false); - SymK->second.setState(SymbolState::Materializing); - UnmaterializedInfos.erase(KV.first); - } +Error JITDylib::lodgeQueryImpl(MaterializationUnitList &MUs, + std::shared_ptr &Q, + LookupKind K, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Unresolved) { + + return Unresolved.forEachWithRemoval( + [&](const SymbolStringPtr &Name, + SymbolLookupFlags SymLookupFlags) -> Expected { + // Search for name in symbols. If not found then continue without + // removal. + auto SymI = Symbols.find(Name); + if (SymI == Symbols.end()) + return false; + + // If this is a non exported symbol and we're matching exported symbols + // only then skip this symbol without removal. + if (!SymI->second.getFlags().isExported() && + JDLookupFlags == JITDylibLookupFlags::MatchExportedSymbolsOnly) + return false; + + // If we matched against this symbol but it is in the error state then + // bail out and treat it as a failure to materialize. + if (SymI->second.getFlags().hasError()) { + auto FailedSymbolsMap = std::make_shared(); + (*FailedSymbolsMap)[this] = {Name}; + return make_error(std::move(FailedSymbolsMap)); + } - // Add MU to the list of MaterializationUnits to be materialized. - MUs.push_back(std::move(MU)); - } + // If this symbol already meets the required state for then notify the + // query, then remove the symbol and continue. + if (SymI->second.getState() >= Q->getRequiredState()) { + Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); + return true; + } - // Add the query to the PendingQueries list. - assert(SymI->second.isInMaterializationPhase() && - "By this line the symbol should be materializing"); - auto &MI = MaterializingInfos[Name]; - MI.addQuery(Q); - Q->addQueryDependence(*this, Name); - } + // Otherwise this symbol does not yet meet the required state. Check + // whether it has a materializer attached, and if so prepare to run it. + if (SymI->second.hasMaterializerAttached()) { + assert(SymI->second.getAddress() == 0 && + "Symbol not resolved but already has address?"); + auto UMII = UnmaterializedInfos.find(Name); + assert(UMII != UnmaterializedInfos.end() && + "Lazy symbol should have UnmaterializedInfo"); + auto MU = std::move(UMII->second->MU); + assert(MU != nullptr && "Materializer should not be null"); + + // Move all symbols associated with this MaterializationUnit into + // materializing state. + for (auto &KV : MU->getSymbols()) { + auto SymK = Symbols.find(KV.first); + SymK->second.setMaterializerAttached(false); + SymK->second.setState(SymbolState::Materializing); + UnmaterializedInfos.erase(KV.first); + } - // Remove any symbols that we found. - for (auto &Name : ToRemove) - Unresolved.erase(Name); + // Add MU to the list of MaterializationUnits to be materialized. + MUs.push_back(std::move(MU)); + } - return Error::success(); + // Add the query to the PendingQueries list and continue, deleting the + // element. + assert(SymI->second.isInMaterializationPhase() && + "By this line the symbol should be materializing"); + auto &MI = MaterializingInfos[Name]; + MI.addQuery(Q); + Q->addQueryDependence(*this, Name); + return true; + }); } Expected @@ -1526,7 +1564,7 @@ JITDylib::legacyLookup(std::shared_ptr Q, bool QueryComplete = false; std::vector> MUs; - SymbolNameSet Unresolved = std::move(Names); + SymbolLookupSet Unresolved(Names); auto Err = ES.runSessionLocked([&, this]() -> Error { QueryComplete = lookupImpl(Q, MUs, Unresolved); @@ -1538,16 +1576,13 @@ JITDylib::legacyLookup(std::shared_ptr Q, break; assert(!QueryComplete && "query complete but unresolved symbols remain?"); - auto NewDefs = DG->tryToGenerate(*this, Unresolved); - if (!NewDefs) - return NewDefs.takeError(); - if (!NewDefs->empty()) { - for (auto &D : *NewDefs) - Unresolved.erase(D); - QueryComplete = lookupImpl(Q, MUs, *NewDefs); - assert(NewDefs->empty() && - "All fallback defs should have been found by lookupImpl"); - } + if (auto Err = DG->tryToGenerate(LookupKind::Static, *this, + JITDylibLookupFlags::MatchAllSymbols, + Unresolved)) + return Err; + + if (!Unresolved.empty()) + QueryComplete = lookupImpl(Q, MUs, Unresolved); } return Error::success(); }); @@ -1575,68 +1610,68 @@ JITDylib::legacyLookup(std::shared_ptr Q, // for (auto &MU : MUs) // ES.dispatchMaterialization(*this, std::move(MU)); - return Unresolved; + SymbolNameSet RemainingSymbols; + for (auto &KV : Unresolved) + RemainingSymbols.insert(KV.first); + + return RemainingSymbols; } bool JITDylib::lookupImpl( std::shared_ptr &Q, std::vector> &MUs, - SymbolNameSet &Unresolved) { + SymbolLookupSet &Unresolved) { bool QueryComplete = false; std::vector ToRemove; - for (auto Name : Unresolved) { - - // Search for the name in Symbols. Skip it if not found. - auto SymI = Symbols.find(Name); - if (SymI == Symbols.end()) - continue; - - // If we found Name, mark it to be removed from the Unresolved set. - ToRemove.push_back(Name); - - if (SymI->second.getState() >= Q->getRequiredState()) { - Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); - if (Q->isComplete()) - QueryComplete = true; - continue; - } - - // If the symbol is lazy, get the MaterialiaztionUnit for it. - if (SymI->second.hasMaterializerAttached()) { - assert(SymI->second.getAddress() == 0 && - "Lazy symbol should not have a resolved address"); - auto UMII = UnmaterializedInfos.find(Name); - assert(UMII != UnmaterializedInfos.end() && - "Lazy symbol should have UnmaterializedInfo"); - auto MU = std::move(UMII->second->MU); - assert(MU != nullptr && "Materializer should not be null"); - - // Kick all symbols associated with this MaterializationUnit into - // materializing state. - for (auto &KV : MU->getSymbols()) { - auto SymK = Symbols.find(KV.first); - assert(SymK != Symbols.end() && "Missing symbol table entry"); - SymK->second.setState(SymbolState::Materializing); - SymK->second.setMaterializerAttached(false); - UnmaterializedInfos.erase(KV.first); - } + Unresolved.forEachWithRemoval( + [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) -> bool { + // Search for the name in Symbols. Skip without removing if not found. + auto SymI = Symbols.find(Name); + if (SymI == Symbols.end()) + return false; + + // If the symbol is already in the required state then notify the query + // and remove. + if (SymI->second.getState() >= Q->getRequiredState()) { + Q->notifySymbolMetRequiredState(Name, SymI->second.getSymbol()); + if (Q->isComplete()) + QueryComplete = true; + return true; + } - // Add MU to the list of MaterializationUnits to be materialized. - MUs.push_back(std::move(MU)); - } + // If the symbol is lazy, get the MaterialiaztionUnit for it. + if (SymI->second.hasMaterializerAttached()) { + assert(SymI->second.getAddress() == 0 && + "Lazy symbol should not have a resolved address"); + auto UMII = UnmaterializedInfos.find(Name); + assert(UMII != UnmaterializedInfos.end() && + "Lazy symbol should have UnmaterializedInfo"); + auto MU = std::move(UMII->second->MU); + assert(MU != nullptr && "Materializer should not be null"); + + // Kick all symbols associated with this MaterializationUnit into + // materializing state. + for (auto &KV : MU->getSymbols()) { + auto SymK = Symbols.find(KV.first); + assert(SymK != Symbols.end() && "Missing symbol table entry"); + SymK->second.setState(SymbolState::Materializing); + SymK->second.setMaterializerAttached(false); + UnmaterializedInfos.erase(KV.first); + } - // Add the query to the PendingQueries list. - assert(SymI->second.isInMaterializationPhase() && - "By this line the symbol should be materializing"); - auto &MI = MaterializingInfos[Name]; - MI.addQuery(Q); - Q->addQueryDependence(*this, Name); - } + // Add MU to the list of MaterializationUnits to be materialized. + MUs.push_back(std::move(MU)); + } - // Remove any marked symbols from the Unresolved set. - for (auto &Name : ToRemove) - Unresolved.erase(Name); + // Add the query to the PendingQueries list. + assert(SymI->second.isInMaterializationPhase() && + "By this line the symbol should be materializing"); + auto &MI = MaterializingInfos[Name]; + MI.addQuery(Q); + Q->addQueryDependence(*this, Name); + return true; + }); return QueryComplete; } @@ -1645,11 +1680,7 @@ void JITDylib::dump(raw_ostream &OS) { ES.runSessionLocked([&, this]() { OS << "JITDylib \"" << JITDylibName << "\" (ES: " << format("0x%016" PRIx64, reinterpret_cast(&ES)) << "):\n" - << "Search order: ["; - for (auto &KV : SearchOrder) - OS << " (\"" << KV.first->getName() << "\", " - << (KV.second ? "all" : "exported only") << ")"; - OS << " ]\n" + << "Search order: " << SearchOrder << "\n" << "Symbol table:\n"; for (auto &KV : Symbols) { @@ -1730,7 +1761,7 @@ JITDylib::MaterializingInfo::takeQueriesMeeting(SymbolState RequiredState) { JITDylib::JITDylib(ExecutionSession &ES, std::string Name) : ES(ES), JITDylibName(std::move(Name)) { - SearchOrder.push_back({this, true}); + SearchOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols}); } Error JITDylib::defineImpl(MaterializationUnit &MU) { @@ -1898,7 +1929,7 @@ Expected ExecutionSession::legacyLookup( #endif auto Query = std::make_shared( - Names, RequiredState, std::move(NotifyComplete)); + SymbolLookupSet(Names), RequiredState, std::move(NotifyComplete)); // FIXME: This should be run session locked along with the registration code // and error reporting below. SymbolNameSet UnresolvedSymbols = AsyncLookup(Query, std::move(Names)); @@ -1935,8 +1966,9 @@ Expected ExecutionSession::legacyLookup( } void ExecutionSession::lookup( - const JITDylibSearchList &SearchOrder, SymbolNameSet Symbols, - SymbolState RequiredState, SymbolsResolvedCallback NotifyComplete, + LookupKind K, const JITDylibSearchOrder &SearchOrder, + SymbolLookupSet Symbols, SymbolState RequiredState, + SymbolsResolvedCallback NotifyComplete, RegisterDependenciesFunction RegisterDependencies) { LLVM_DEBUG({ @@ -1965,14 +1997,24 @@ void ExecutionSession::lookup( "JITDylibList should not contain duplicate entries"); auto &JD = *KV.first; - auto MatchNonExported = KV.second; - if (auto Err = JD.lodgeQuery(Q, Unresolved, MatchNonExported, - CollectedMUsMap[&JD])) + auto JDLookupFlags = KV.second; + if (auto Err = JD.lodgeQuery(CollectedMUsMap[&JD], Q, K, JDLookupFlags, + Unresolved)) return Err; } + // Strip any weakly referenced symbols that were not found. + Unresolved.forEachWithRemoval( + [&](const SymbolStringPtr &Name, SymbolLookupFlags Flags) { + if (Flags == SymbolLookupFlags::WeaklyReferencedSymbol) { + Q->dropSymbol(Name); + return true; + } + return false; + }); + if (!Unresolved.empty()) - return make_error(std::move(Unresolved)); + return make_error(Unresolved.getSymbolNames()); return Error::success(); }; @@ -2026,8 +2068,8 @@ void ExecutionSession::lookup( } Expected -ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, - const SymbolNameSet &Symbols, +ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder, + const SymbolLookupSet &Symbols, LookupKind K, SymbolState RequiredState, RegisterDependenciesFunction RegisterDependencies) { #if LLVM_ENABLE_THREADS @@ -2059,7 +2101,7 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, #endif // Perform the asynchronous lookup. - lookup(SearchOrder, Symbols, RequiredState, NotifyComplete, + lookup(K, SearchOrder, Symbols, RequiredState, NotifyComplete, RegisterDependencies); #if LLVM_ENABLE_THREADS @@ -2080,12 +2122,12 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, } Expected -ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, +ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder, SymbolStringPtr Name) { - SymbolNameSet Names({Name}); + SymbolLookupSet Names({Name}); - if (auto ResultMap = lookup(SearchOrder, std::move(Names), SymbolState::Ready, - NoDependenciesToRegister)) { + if (auto ResultMap = lookup(SearchOrder, std::move(Names), LookupKind::Static, + SymbolState::Ready, NoDependenciesToRegister)) { assert(ResultMap->size() == 1 && "Unexpected number of results"); assert(ResultMap->count(Name) && "Missing result for symbol"); return std::move(ResultMap->begin()->second); @@ -2096,14 +2138,7 @@ ExecutionSession::lookup(const JITDylibSearchList &SearchOrder, Expected ExecutionSession::lookup(ArrayRef SearchOrder, SymbolStringPtr Name) { - SymbolNameSet Names({Name}); - - JITDylibSearchList FullSearchOrder; - FullSearchOrder.reserve(SearchOrder.size()); - for (auto *JD : SearchOrder) - FullSearchOrder.push_back({JD, false}); - - return lookup(FullSearchOrder, Name); + return lookup(makeJITDylibSearchOrder(SearchOrder), Name); } Expected diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index 4a886ac0597c1..4a3482242dbc7 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -118,19 +118,17 @@ void CtorDtorRunner::add(iterator_range CtorDtors) { Error CtorDtorRunner::run() { using CtorDtorTy = void (*)(); - SymbolNameSet Names; - - for (auto &KV : CtorDtorsByPriority) { - for (auto &Name : KV.second) { - auto Added = Names.insert(Name).second; - (void)Added; - assert(Added && "Ctor/Dtor names clashed"); - } - } + SymbolLookupSet LookupSet; + for (auto &KV : CtorDtorsByPriority) + for (auto &Name : KV.second) + LookupSet.add(Name); + assert(!LookupSet.containsDuplicates() && + "Ctor/Dtor list contains duplicates"); auto &ES = JD.getExecutionSession(); - if (auto CtorDtorMap = - ES.lookup(JITDylibSearchList({{&JD, true}}), std::move(Names))) { + if (auto CtorDtorMap = ES.lookup( + makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols), + std::move(LookupSet))) { for (auto &KV : CtorDtorsByPriority) { for (auto &Name : KV.second) { assert(CtorDtorMap->count(Name) && "No entry for Name"); @@ -190,15 +188,16 @@ DynamicLibrarySearchGenerator::Load(const char *FileName, char GlobalPrefix, std::move(Lib), GlobalPrefix, std::move(Allow)); } -Expected -DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) { - orc::SymbolNameSet Added; +Error DynamicLibrarySearchGenerator::tryToGenerate( + LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) { orc::SymbolMap NewSymbols; bool HasGlobalPrefix = (GlobalPrefix != '\0'); - for (auto &Name : Names) { + for (auto &KV : Symbols) { + auto &Name = KV.first; + if ((*Name).empty()) continue; @@ -211,20 +210,16 @@ DynamicLibrarySearchGenerator::tryToGenerate(JITDylib &JD, std::string Tmp((*Name).data() + HasGlobalPrefix, (*Name).size() - HasGlobalPrefix); if (void *Addr = Dylib.getAddressOfSymbol(Tmp.c_str())) { - Added.insert(Name); NewSymbols[Name] = JITEvaluatedSymbol( static_cast(reinterpret_cast(Addr)), JITSymbolFlags::Exported); } } - // Add any new symbols to JD. Since the generator is only called for symbols - // that are not already defined, this will never trigger a duplicate - // definition error, so we can wrap this call in a 'cantFail'. - if (!NewSymbols.empty()) - cantFail(JD.define(absoluteSymbols(std::move(NewSymbols)))); + if (NewSymbols.empty()) + return Error::success(); - return Added; + return JD.define(absoluteSymbols(std::move(NewSymbols))); } Expected> @@ -251,15 +246,24 @@ StaticLibraryDefinitionGenerator::Create( return std::move(ADG); } -Expected -StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) { +Error StaticLibraryDefinitionGenerator::tryToGenerate( + LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) { + + // Don't materialize symbols from static archives unless this is a static + // lookup. + if (K != LookupKind::Static) + return Error::success(); + + // Bail out early if we've already freed the archive. + if (!Archive) + return Error::success(); DenseSet> ChildBufferInfos; - SymbolNameSet NewDefs; - for (const auto &Name : Names) { - auto Child = Archive.findSym(*Name); + for (const auto &KV : Symbols) { + const auto &Name = KV.first; + auto Child = Archive->findSym(*Name); if (!Child) return Child.takeError(); if (*Child == None) @@ -269,7 +273,6 @@ StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD, return ChildBuffer.takeError(); ChildBufferInfos.insert( {ChildBuffer->getBuffer(), ChildBuffer->getBufferIdentifier()}); - NewDefs.insert(Name); } for (auto ChildBufferInfo : ChildBufferInfos) { @@ -278,31 +281,16 @@ StaticLibraryDefinitionGenerator::tryToGenerate(JITDylib &JD, if (auto Err = L.add(JD, MemoryBuffer::getMemBuffer(ChildBufferRef), VModuleKey())) - return std::move(Err); - - --UnrealizedObjects; + return Err; } - return NewDefs; + return Error::success(); } StaticLibraryDefinitionGenerator::StaticLibraryDefinitionGenerator( ObjectLayer &L, std::unique_ptr ArchiveBuffer, Error &Err) : L(L), ArchiveBuffer(std::move(ArchiveBuffer)), - Archive(*this->ArchiveBuffer, Err) { - - if (Err) - return; - - Error Err2 = Error::success(); - for (auto _ : Archive.children(Err2)) { - (void)_; - ++UnrealizedObjects; - } - - // No need to check this: We will leave it to the caller. - Err = std::move(Err2); -} + Archive(std::make_unique(*this->ArchiveBuffer, Err)) {} } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 0295db7633dd0..440935ffe9fb9 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -101,7 +101,10 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback( Name = I->second; } - if (auto Sym = ES.lookup(JITDylibSearchList({{&CallbacksJD, true}}), Name)) + if (auto Sym = + ES.lookup(makeJITDylibSearchOrder( + &CallbacksJD, JITDylibLookupFlags::MatchAllSymbols), + Name)) return Sym->getAddress(); else { llvm::dbgs() << "Didn't find callback.\n"; diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp index 03f22e0c2a2a9..fb5515ca64d1b 100644 --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -56,7 +56,9 @@ Error LLJIT::addObjectFile(JITDylib &JD, std::unique_ptr Obj) { Expected LLJIT::lookupLinkerMangled(JITDylib &JD, StringRef Name) { - return ES->lookup(JITDylibSearchList({{&JD, true}}), ES->intern(Name)); + return ES->lookup( + makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols), + ES->intern(Name)); } std::unique_ptr diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 93aabd817d601..aab490feb8ea2 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -50,8 +50,10 @@ LazyCallThroughManager::callThroughToSymbol(JITTargetAddress TrampolineAddr) { SourceJD = I->second.first; SymbolName = I->second.second; } - auto LookupResult = - ES.lookup(JITDylibSearchList({{SourceJD, true}}), SymbolName); + + auto LookupResult = ES.lookup( + makeJITDylibSearchOrder(SourceJD, JITDylibLookupFlags::MatchAllSymbols), + SymbolName); if (!LookupResult) { ES.reportError(LookupResult.takeError()); diff --git a/llvm/lib/ExecutionEngine/Orc/Legacy.cpp b/llvm/lib/ExecutionEngine/Orc/Legacy.cpp index 9f9a6730b2c30..67b804c37287d 100644 --- a/llvm/lib/ExecutionEngine/Orc/Legacy.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Legacy.cpp @@ -37,7 +37,8 @@ void JITSymbolResolverAdapter::lookup(const LookupSet &Symbols, }; auto Q = std::make_shared( - InternedSymbols, SymbolState::Resolved, std::move(OnResolvedWithUnwrap)); + SymbolLookupSet(InternedSymbols), SymbolState::Resolved, + std::move(OnResolvedWithUnwrap)); auto Unresolved = R.lookup(Q, InternedSymbols); if (Unresolved.empty()) { diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 874decb2ade0b..be0ce4a1d75a0 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -47,18 +47,28 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { MR.failMaterialization(); } - void lookup(const DenseSet &Symbols, + void lookup(const LookupMap &Symbols, std::unique_ptr LC) override { - JITDylibSearchList SearchOrder; + JITDylibSearchOrder SearchOrder; MR.getTargetJITDylib().withSearchOrderDo( - [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; }); + [&](const JITDylibSearchOrder &O) { SearchOrder = O; }); auto &ES = Layer.getExecutionSession(); - SymbolNameSet InternedSymbols; - for (auto &S : Symbols) - InternedSymbols.insert(ES.intern(S)); + SymbolLookupSet LookupSet; + for (auto &KV : Symbols) { + orc::SymbolLookupFlags LookupFlags; + switch (KV.second) { + case jitlink::SymbolLookupFlags::RequiredSymbol: + LookupFlags = orc::SymbolLookupFlags::RequiredSymbol; + break; + case jitlink::SymbolLookupFlags::WeaklyReferencedSymbol: + LookupFlags = orc::SymbolLookupFlags::WeaklyReferencedSymbol; + break; + } + LookupSet.add(ES.intern(KV.first), LookupFlags); + } // OnResolve -- De-intern the symbols and pass the result to the linker. auto OnResolve = [this, LookupContinuation = std::move(LC)]( @@ -74,8 +84,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { } }; - ES.lookup(SearchOrder, std::move(InternedSymbols), SymbolState::Resolved, - std::move(OnResolve), [this](const SymbolDependenceMap &Deps) { + ES.lookup(LookupKind::Static, SearchOrder, std::move(LookupSet), + SymbolState::Resolved, std::move(OnResolve), + [this](const SymbolDependenceMap &Deps) { registerDependencies(Deps); }); } diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index 939cd539d1fb0..3344bd4d53f98 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -19,11 +19,11 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver { void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) { auto &ES = MR.getTargetJITDylib().getExecutionSession(); - SymbolNameSet InternedSymbols; + SymbolLookupSet InternedSymbols; // Intern the requested symbols: lookup takes interned strings. for (auto &S : Symbols) - InternedSymbols.insert(ES.intern(S)); + InternedSymbols.add(ES.intern(S)); // Build an OnResolve callback to unwrap the interned strings and pass them // to the OnResolved callback. @@ -46,11 +46,12 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver { MR.addDependenciesForAll(Deps); }; - JITDylibSearchList SearchOrder; + JITDylibSearchOrder SearchOrder; MR.getTargetJITDylib().withSearchOrderDo( - [&](const JITDylibSearchList &JDs) { SearchOrder = JDs; }); - ES.lookup(SearchOrder, InternedSymbols, SymbolState::Resolved, - std::move(OnResolvedWithUnwrap), RegisterDependencies); + [&](const JITDylibSearchOrder &JDs) { SearchOrder = JDs; }); + ES.lookup(LookupKind::Static, SearchOrder, InternedSymbols, + SymbolState::Resolved, std::move(OnResolvedWithUnwrap), + RegisterDependencies); } Expected getResponsibilitySet(const LookupSet &Symbols) { diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_weak_references.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_weak_references.s new file mode 100644 index 0000000000000..20fa5536302d7 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_weak_references.s @@ -0,0 +1,19 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t/macho_weak_refs.o %s +# RUN: llvm-jitlink -noexec -check-name=jitlink-check-bar-present -define-abs bar=0x1 -check=%s %t/macho_weak_refs.o +# RUN: llvm-jitlink -noexec -check-name=jitlink-check-bar-absent -check=%s %t/macho_weak_refs.o + +# Test weak reference handling by linking with and without a definition of 'bar' available. + + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 10, 14 sdk_version 10, 14 + .globl _main + .p2align 4, 0x90 +_main: +# jitlink-check-bar-present: *{8}(got_addr(macho_weak_refs.o, bar)) = bar +# jitlink-check-bar-absent: *{8}(got_addr(macho_weak_refs.o, bar)) = 0 + cmpq $0, bar@GOTPCREL(%rip) + + .weak_reference bar + +.subsections_via_symbols diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 251e79cf56d12..003a333d9563d 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -56,6 +56,10 @@ static cl::list CheckFiles("check", cl::desc("File containing verifier checks"), cl::ZeroOrMore); +static cl::opt + CheckName("check-name", cl::desc("Name of checks to match against"), + cl::init("jitlink-check")); + static cl::opt EntryPointName("entry", cl::desc("Symbol to call as main entry point"), cl::init("")); @@ -604,11 +608,12 @@ Error loadObjects(Session &S) { // Set every dylib to link against every other, in command line order. for (auto *JD : S.JDSearchOrder) { - JITDylibSearchList O; + auto LookupFlags = JITDylibLookupFlags::MatchExportedSymbolsOnly; + JITDylibSearchOrder O; for (auto *JD2 : S.JDSearchOrder) { if (JD2 == JD) continue; - O.push_back(std::make_pair(JD2, false)); + O.push_back(std::make_pair(JD2, LookupFlags)); } JD->setSearchOrder(std::move(O)); } @@ -741,10 +746,11 @@ Error runChecks(Session &S) { S.TT.isLittleEndian() ? support::little : support::big, Disassembler.get(), InstPrinter.get(), dbgs()); + std::string CheckLineStart = "# " + CheckName + ":"; for (auto &CheckFile : CheckFiles) { auto CheckerFileBuf = ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(CheckFile))); - if (!Checker.checkAllRulesInBuffer("# jitlink-check:", &*CheckerFileBuf)) + if (!Checker.checkAllRulesInBuffer(CheckLineStart, &*CheckerFileBuf)) ExitOnErr(make_error( "Some checks in " + CheckFile + " failed", inconvertibleErrorCode())); } diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp index 3e16a50d07bdf..68a6d2ed2ca2c 100644 --- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp @@ -43,8 +43,9 @@ TEST_F(CoreAPIsStandardTest, BasicSuccessfulLookup) { FooMR = std::make_shared(std::move(R)); }))); - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Ready, - OnCompletion, NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Ready, OnCompletion, + NoDependenciesToRegister); EXPECT_FALSE(OnCompletionRun) << "Should not have been resolved yet"; @@ -67,7 +68,7 @@ TEST_F(CoreAPIsStandardTest, ExecutionSessionFailQuery) { OnCompletionRun = true; }; - AsynchronousSymbolQuery Q(SymbolNameSet({Foo}), SymbolState::Ready, + AsynchronousSymbolQuery Q(SymbolLookupSet(Foo), SymbolState::Ready, OnCompletion); ES.legacyFailQuery(Q, @@ -84,8 +85,8 @@ TEST_F(CoreAPIsStandardTest, EmptyLookup) { OnCompletionRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {}, SymbolState::Ready, - OnCompletion, NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), SymbolLookupSet(), + SymbolState::Ready, OnCompletion, NoDependenciesToRegister); EXPECT_TRUE(OnCompletionRun) << "OnCompletion was not run for empty query"; } @@ -131,7 +132,8 @@ TEST_F(CoreAPIsStandardTest, RemoveSymbolsTest) { bool OnCompletionRun = false; ES.lookup( - JITDylibSearchList({{&JD, false}}), {Foo, Baz}, SymbolState::Ready, + LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet({Foo, Baz}), SymbolState::Ready, [&](Expected Result) { cantFail(Result.takeError()); OnCompletionRun = true; @@ -179,7 +181,7 @@ TEST_F(CoreAPIsStandardTest, ChainedJITDylibLookup) { bool OnCompletionRun = false; auto Q = std::make_shared( - SymbolNameSet({Foo}), SymbolState::Ready, + SymbolLookupSet({Foo}), SymbolState::Ready, [&](Expected Result) { cantFail(std::move(Result)); OnCompletionRun = true; @@ -200,8 +202,8 @@ TEST_F(CoreAPIsStandardTest, LookupWithHiddenSymbols) { cantFail(JD2.define(absoluteSymbols({{Bar, QuxSym}}))); /// Try a blocking lookup. - auto Result = cantFail( - ES.lookup(JITDylibSearchList({{&JD, false}, {&JD2, false}}), {Foo, Bar})); + auto Result = cantFail(ES.lookup(makeJITDylibSearchOrder({&JD, &JD2}), + SymbolLookupSet({Foo, Bar}))); EXPECT_EQ(Result.size(), 2U) << "Unexpected number of results"; EXPECT_EQ(Result.count(Foo), 1U) << "Missing result for \"Foo\""; @@ -226,9 +228,9 @@ TEST_F(CoreAPIsStandardTest, LookupFlagsTest) { cantFail(JD.define(absoluteSymbols({{Foo, FooSym}}))); cantFail(JD.define(std::move(MU))); - SymbolNameSet Names({Foo, Bar, Baz}); - - auto SymbolFlags = cantFail(JD.lookupFlags(Names)); + auto SymbolFlags = cantFail(JD.lookupFlags( + LookupKind::Static, JITDylibLookupFlags::MatchExportedSymbolsOnly, + SymbolLookupSet({Foo, Bar, Baz}))); EXPECT_EQ(SymbolFlags.size(), 2U) << "Returned symbol flags contains unexpected results"; @@ -245,20 +247,24 @@ TEST_F(CoreAPIsStandardTest, LookupWithGeneratorFailure) { class BadGenerator : public JITDylib::DefinitionGenerator { public: - Expected tryToGenerate(JITDylib &, - const SymbolNameSet &) override { + Error tryToGenerate(LookupKind K, JITDylib &, JITDylibLookupFlags, + const SymbolLookupSet &) override { return make_error("BadGenerator", inconvertibleErrorCode()); } }; JD.addGenerator(std::make_unique()); - EXPECT_THAT_ERROR(JD.lookupFlags({Foo}).takeError(), Failed()) + EXPECT_THAT_ERROR( + JD.lookupFlags(LookupKind::Static, + JITDylibLookupFlags::MatchExportedSymbolsOnly, + SymbolLookupSet(Foo)) + .takeError(), + Failed()) << "Generator failure did not propagate through lookupFlags"; EXPECT_THAT_ERROR( - ES.lookup(JITDylibSearchList({{&JD, false}}), SymbolNameSet({Foo})) - .takeError(), + ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet(Foo)).takeError(), Failed()) << "Generator failure did not propagate through lookup"; } @@ -269,7 +275,8 @@ TEST_F(CoreAPIsStandardTest, TestBasicAliases) { {Qux, {Bar, JITSymbolFlags::Weak}}}))); cantFail(JD.define(absoluteSymbols({{Qux, QuxSym}}))); - auto Result = ES.lookup(JITDylibSearchList({{&JD, false}}), {Baz, Qux}); + auto Result = + ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet({Baz, Qux})); EXPECT_TRUE(!!Result) << "Unexpected lookup failure"; EXPECT_EQ(Result->count(Baz), 1U) << "No result for \"baz\""; EXPECT_EQ(Result->count(Qux), 1U) << "No result for \"qux\""; @@ -284,7 +291,8 @@ TEST_F(CoreAPIsStandardTest, TestChainedAliases) { cantFail(JD.define(symbolAliases( {{Baz, {Bar, BazSym.getFlags()}}, {Bar, {Foo, BarSym.getFlags()}}}))); - auto Result = ES.lookup(JITDylibSearchList({{&JD, false}}), {Bar, Baz}); + auto Result = + ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet({Bar, Baz})); EXPECT_TRUE(!!Result) << "Unexpected lookup failure"; EXPECT_EQ(Result->count(Bar), 1U) << "No result for \"bar\""; EXPECT_EQ(Result->count(Baz), 1U) << "No result for \"baz\""; @@ -303,7 +311,7 @@ TEST_F(CoreAPIsStandardTest, TestBasicReExports) { cantFail(JD2.define(reexports(JD, {{Bar, {Foo, BarSym.getFlags()}}}))); - auto Result = cantFail(ES.lookup(JITDylibSearchList({{&JD2, false}}), Bar)); + auto Result = cantFail(ES.lookup(makeJITDylibSearchOrder(&JD2), Bar)); EXPECT_EQ(Result.getAddress(), FooSym.getAddress()) << "Re-export Bar for symbol Foo should match FooSym's address"; } @@ -329,7 +337,7 @@ TEST_F(CoreAPIsStandardTest, TestThatReExportsDontUnnecessarilyMaterialize) { cantFail(JD2.define(reexports( JD, {{Baz, {Foo, BazSym.getFlags()}}, {Qux, {Bar, QuxSym.getFlags()}}}))); - auto Result = cantFail(ES.lookup(JITDylibSearchList({{&JD2, false}}), Baz)); + auto Result = cantFail(ES.lookup(makeJITDylibSearchOrder(&JD2), Baz)); EXPECT_EQ(Result.getAddress(), FooSym.getAddress()) << "Re-export Baz for symbol Foo should match FooSym's address"; @@ -344,13 +352,16 @@ TEST_F(CoreAPIsStandardTest, TestReexportsGenerator) { auto Filter = [this](SymbolStringPtr Name) { return Name != Bar; }; - JD.addGenerator(std::make_unique(JD2, false, Filter)); + JD.addGenerator(std::make_unique( + JD2, JITDylibLookupFlags::MatchExportedSymbolsOnly, Filter)); - auto Flags = cantFail(JD.lookupFlags({Foo, Bar, Baz})); + auto Flags = cantFail(JD.lookupFlags( + LookupKind::Static, JITDylibLookupFlags::MatchExportedSymbolsOnly, + SymbolLookupSet({Foo, Bar, Baz}))); EXPECT_EQ(Flags.size(), 1U) << "Unexpected number of results"; EXPECT_EQ(Flags[Foo], FooSym.getFlags()) << "Unexpected flags for Foo"; - auto Result = cantFail(ES.lookup(JITDylibSearchList({{&JD, false}}), Foo)); + auto Result = cantFail(ES.lookup(makeJITDylibSearchOrder(&JD), Foo)); EXPECT_EQ(Result.getAddress(), FooSym.getAddress()) << "Incorrect reexported symbol address"; @@ -370,8 +381,9 @@ TEST_F(CoreAPIsStandardTest, TestTrivialCircularDependency) { FooReady = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Ready, - OnCompletion, NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet({Foo}), SymbolState::Ready, OnCompletion, + NoDependenciesToRegister); FooR->addDependenciesForAll({{&JD, SymbolNameSet({Foo})}}); EXPECT_THAT_ERROR(FooR->notifyResolved({{Foo, FooSym}}), Succeeded()) @@ -430,11 +442,13 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { // Issue lookups for Foo. Use NoDependenciesToRegister: We're going to add // the dependencies manually below. - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Resolved, + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Resolved, std::move(OnFooResolution), NoDependenciesToRegister); - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Ready, - std::move(OnFooReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Ready, std::move(OnFooReady), + NoDependenciesToRegister); bool BarResolved = false; bool BarReady = false; @@ -448,11 +462,13 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { BarReady = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Bar}, SymbolState::Resolved, + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Bar), SymbolState::Resolved, std::move(OnBarResolution), NoDependenciesToRegister); - ES.lookup(JITDylibSearchList({{&JD, false}}), {Bar}, SymbolState::Ready, - std::move(OnBarReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Bar), SymbolState::Ready, std::move(OnBarReady), + NoDependenciesToRegister); bool BazResolved = false; bool BazReady = false; @@ -467,11 +483,13 @@ TEST_F(CoreAPIsStandardTest, TestCircularDependenceInOneJITDylib) { BazReady = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Baz}, SymbolState::Resolved, + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Baz), SymbolState::Resolved, std::move(OnBazResolution), NoDependenciesToRegister); - ES.lookup(JITDylibSearchList({{&JD, false}}), {Baz}, SymbolState::Ready, - std::move(OnBazReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Baz), SymbolState::Ready, std::move(OnBazReady), + NoDependenciesToRegister); // Add a circular dependency: Foo -> Bar, Bar -> Baz, Baz -> Foo. FooR->addDependenciesForAll({{&JD, SymbolNameSet({Bar})}}); @@ -551,8 +569,9 @@ TEST_F(CoreAPIsStandardTest, FailureInDependency) { OnFooReadyRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Ready, - std::move(OnFooReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Ready, std::move(OnFooReady), + NoDependenciesToRegister); bool OnBarReadyRun = false; auto OnBarReady = [&](Expected Result) { @@ -560,8 +579,9 @@ TEST_F(CoreAPIsStandardTest, FailureInDependency) { OnBarReadyRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Bar}, SymbolState::Ready, - std::move(OnBarReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Bar), SymbolState::Ready, std::move(OnBarReady), + NoDependenciesToRegister); // Add a dependency by Foo on Bar. FooR->addDependenciesForAll({{&JD, SymbolNameSet({Bar})}}); @@ -614,8 +634,9 @@ TEST_F(CoreAPIsStandardTest, FailureInCircularDependency) { OnFooReadyRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Ready, - std::move(OnFooReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Ready, std::move(OnFooReady), + NoDependenciesToRegister); bool OnBarReadyRun = false; auto OnBarReady = [&](Expected Result) { @@ -623,8 +644,9 @@ TEST_F(CoreAPIsStandardTest, FailureInCircularDependency) { OnBarReadyRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Bar}, SymbolState::Ready, - std::move(OnBarReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Bar), SymbolState::Ready, std::move(OnBarReady), + NoDependenciesToRegister); // Add a dependency by Foo on Bar and vice-versa. FooR->addDependenciesForAll({{&JD, SymbolNameSet({Bar})}}); @@ -678,8 +700,9 @@ TEST_F(CoreAPIsStandardTest, AddDependencyOnFailedSymbol) { OnFooReadyRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Ready, - std::move(OnFooReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Ready, std::move(OnFooReady), + NoDependenciesToRegister); bool OnBarReadyRun = false; auto OnBarReady = [&](Expected Result) { @@ -687,8 +710,9 @@ TEST_F(CoreAPIsStandardTest, AddDependencyOnFailedSymbol) { OnBarReadyRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Bar}, SymbolState::Ready, - std::move(OnBarReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Bar), SymbolState::Ready, std::move(OnBarReady), + NoDependenciesToRegister); // Fail bar. BarR->failMaterialization(); @@ -742,8 +766,9 @@ TEST_F(CoreAPIsStandardTest, FailAfterMaterialization) { OnFooReadyRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Ready, - std::move(OnFooReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Ready, std::move(OnFooReady), + NoDependenciesToRegister); bool OnBarReadyRun = false; auto OnBarReady = [&](Expected Result) { @@ -751,8 +776,9 @@ TEST_F(CoreAPIsStandardTest, FailAfterMaterialization) { OnBarReadyRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Bar}, SymbolState::Ready, - std::move(OnBarReady), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Bar), SymbolState::Ready, std::move(OnBarReady), + NoDependenciesToRegister); // Add a dependency by Foo on Bar and vice-versa. FooR->addDependenciesForAll({{&JD, SymbolNameSet({Bar})}}); @@ -854,8 +880,6 @@ TEST_F(CoreAPIsStandardTest, AddAndMaterializeLazySymbol) { cantFail(JD.define(MU)); cantFail(JD.define(absoluteSymbols({{Bar, BarSym}}))); - SymbolNameSet Names({Foo}); - bool OnCompletionRun = false; auto OnCompletion = [&](Expected Result) { @@ -867,8 +891,9 @@ TEST_F(CoreAPIsStandardTest, AddAndMaterializeLazySymbol) { OnCompletionRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), Names, SymbolState::Ready, - std::move(OnCompletion), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Ready, std::move(OnCompletion), + NoDependenciesToRegister); EXPECT_TRUE(FooMaterialized) << "Foo was not materialized"; EXPECT_TRUE(BarDiscarded) << "Bar was not discarded"; @@ -910,8 +935,9 @@ TEST_F(CoreAPIsStandardTest, TestBasicWeakSymbolMaterialization) { OnCompletionRun = true; }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Bar}, SymbolState::Ready, - std::move(OnCompletion), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Bar), SymbolState::Ready, std::move(OnCompletion), + NoDependenciesToRegister); EXPECT_TRUE(OnCompletionRun) << "OnCompletion not run"; EXPECT_TRUE(BarMaterialized) << "Bar was not materialized at all"; @@ -938,13 +964,13 @@ TEST_F(CoreAPIsStandardTest, DefineMaterializingSymbol) { }); cantFail(JD.define(MU)); - cantFail(ES.lookup(JITDylibSearchList({{&JD, false}}), Foo)); + cantFail(ES.lookup(makeJITDylibSearchOrder(&JD), Foo)); // Assert that materialization is complete by now. ExpectNoMoreMaterialization = true; // Look up bar to verify that no further materialization happens. - auto BarResult = cantFail(ES.lookup(JITDylibSearchList({{&JD, false}}), Bar)); + auto BarResult = cantFail(ES.lookup(makeJITDylibSearchOrder(&JD), Bar)); EXPECT_EQ(BarResult.getAddress(), BarSym.getAddress()) << "Expected Bar == BarSym"; } @@ -955,19 +981,19 @@ TEST_F(CoreAPIsStandardTest, GeneratorTest) { class TestGenerator : public JITDylib::DefinitionGenerator { public: TestGenerator(SymbolMap Symbols) : Symbols(std::move(Symbols)) {} - Expected tryToGenerate(JITDylib &JD, - const SymbolNameSet &Names) { + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Names) { SymbolMap NewDefs; - SymbolNameSet NewNames; - for (auto &Name : Names) { - if (Symbols.count(Name)) { + for (const auto &KV : Names) { + const auto &Name = KV.first; + if (Symbols.count(Name)) NewDefs[Name] = Symbols[Name]; - NewNames.insert(Name); - } } + cantFail(JD.define(absoluteSymbols(std::move(NewDefs)))); - return NewNames; + return Error::success(); }; private: @@ -976,8 +1002,8 @@ TEST_F(CoreAPIsStandardTest, GeneratorTest) { JD.addGenerator(std::make_unique(SymbolMap({{Bar, BarSym}}))); - auto Result = - cantFail(ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo, Bar})); + auto Result = cantFail( + ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet({Foo, Bar}))); EXPECT_EQ(Result.count(Bar), 1U) << "Expected to find fallback def for 'bar'"; EXPECT_EQ(Result[Bar].getAddress(), BarSym.getAddress()) @@ -995,7 +1021,7 @@ TEST_F(CoreAPIsStandardTest, FailResolution) { cantFail(JD.define(MU)); SymbolNameSet Names({Foo, Bar}); - auto Result = ES.lookup(JITDylibSearchList({{&JD, false}}), Names); + auto Result = ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet(Names)); EXPECT_FALSE(!!Result) << "Expected failure"; if (!Result) { @@ -1028,8 +1054,8 @@ TEST_F(CoreAPIsStandardTest, FailEmissionAfterResolution) { cantFail(R.notifyResolved(SymbolMap({{Foo, FooSym}, {Bar, BarSym}}))); ES.lookup( - JITDylibSearchList({{&JD, false}}), SymbolNameSet({Baz}), - SymbolState::Resolved, + LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet({Baz}), SymbolState::Resolved, [&R](Expected Result) { // Called when "baz" is resolved. We don't actually depend // on or care about baz, but use it to trigger failure of @@ -1046,8 +1072,8 @@ TEST_F(CoreAPIsStandardTest, FailEmissionAfterResolution) { cantFail(JD.define(MU)); - SymbolNameSet Names({Foo, Bar}); - auto Result = ES.lookup(JITDylibSearchList({{&JD, false}}), Names); + auto Result = + ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet({Foo, Bar})); EXPECT_THAT_EXPECTED(std::move(Result), Failed()) << "Unexpected success while trying to test error propagation"; @@ -1066,8 +1092,8 @@ TEST_F(CoreAPIsStandardTest, FailAfterPartialResolution) { bool QueryHandlerRun = false; ES.lookup( - JITDylibSearchList({{&JD, false}}), SymbolNameSet({Foo, Bar}), - SymbolState::Resolved, + LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet({Foo, Bar}), SymbolState::Resolved, [&](Expected Result) { EXPECT_THAT_EXPECTED(std::move(Result), Failed()) << "Expected query to fail"; @@ -1087,8 +1113,7 @@ TEST_F(CoreAPIsStandardTest, TestLookupWithUnthreadedMaterialization) { cantFail(JD.define(MU)); - auto FooLookupResult = - cantFail(ES.lookup(JITDylibSearchList({{&JD, false}}), Foo)); + auto FooLookupResult = cantFail(ES.lookup(makeJITDylibSearchOrder(&JD), Foo)); EXPECT_EQ(FooLookupResult.getAddress(), FooSym.getAddress()) << "lookup returned an incorrect address"; @@ -1108,8 +1133,7 @@ TEST_F(CoreAPIsStandardTest, TestLookupWithThreadedMaterialization) { cantFail(JD.define(absoluteSymbols({{Foo, FooSym}}))); - auto FooLookupResult = - cantFail(ES.lookup(JITDylibSearchList({{&JD, false}}), Foo)); + auto FooLookupResult = cantFail(ES.lookup(makeJITDylibSearchOrder(&JD), Foo)); EXPECT_EQ(FooLookupResult.getAddress(), FooSym.getAddress()) << "lookup returned an incorrect address"; @@ -1157,16 +1181,14 @@ TEST_F(CoreAPIsStandardTest, TestGetRequestedSymbolsAndReplace) { EXPECT_FALSE(FooMaterialized) << "Foo should not be materialized yet"; EXPECT_FALSE(BarMaterialized) << "Bar should not be materialized yet"; - auto FooSymResult = - cantFail(ES.lookup(JITDylibSearchList({{&JD, false}}), Foo)); + auto FooSymResult = cantFail(ES.lookup(makeJITDylibSearchOrder(&JD), Foo)); EXPECT_EQ(FooSymResult.getAddress(), FooSym.getAddress()) << "Address mismatch for Foo"; EXPECT_TRUE(FooMaterialized) << "Foo should be materialized now"; EXPECT_FALSE(BarMaterialized) << "Bar still should not be materialized"; - auto BarSymResult = - cantFail(ES.lookup(JITDylibSearchList({{&JD, false}}), Bar)); + auto BarSymResult = cantFail(ES.lookup(makeJITDylibSearchOrder(&JD), Bar)); EXPECT_EQ(BarSymResult.getAddress(), BarSym.getAddress()) << "Address mismatch for Bar"; EXPECT_TRUE(BarMaterialized) << "Bar should be materialized now"; @@ -1186,7 +1208,8 @@ TEST_F(CoreAPIsStandardTest, TestMaterializationResponsibilityDelegation) { cantFail(JD.define(MU)); - auto Result = ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo, Bar}); + auto Result = + ES.lookup(makeJITDylibSearchOrder(&JD), SymbolLookupSet({Foo, Bar})); EXPECT_TRUE(!!Result) << "Result should be a success value"; EXPECT_EQ(Result->count(Foo), 1U) << "\"Foo\" entry missing"; @@ -1216,8 +1239,9 @@ TEST_F(CoreAPIsStandardTest, TestMaterializeWeakSymbol) { cantFail(std::move(Result)); }; - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Ready, - std::move(OnCompletion), NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet({Foo}), SymbolState::Ready, std::move(OnCompletion), + NoDependenciesToRegister); auto MU2 = std::make_unique( SymbolFlagsMap({{Foo, JITSymbolFlags::Exported}}), diff --git a/llvm/unittests/ExecutionEngine/Orc/LegacyAPIInteropTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LegacyAPIInteropTest.cpp index f79d721b812c0..7b6d4b078fb9f 100644 --- a/llvm/unittests/ExecutionEngine/Orc/LegacyAPIInteropTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/LegacyAPIInteropTest.cpp @@ -24,7 +24,9 @@ TEST_F(LegacyAPIsStandardTest, TestLambdaSymbolResolver) { auto Resolver = createSymbolResolver( [&](const SymbolNameSet &Symbols) { - auto FlagsMap = cantFail(JD.lookupFlags(Symbols)); + auto FlagsMap = cantFail(JD.lookupFlags( + LookupKind::Static, JITDylibLookupFlags::MatchExportedSymbolsOnly, + SymbolLookupSet(Symbols))); SymbolNameSet Result; for (auto &KV : FlagsMap) if (!KV.second.isStrong()) @@ -57,7 +59,7 @@ TEST_F(LegacyAPIsStandardTest, TestLambdaSymbolResolver) { }; auto Q = std::make_shared( - SymbolNameSet({Foo, Bar}), SymbolState::Resolved, OnCompletion); + SymbolLookupSet({Foo, Bar}), SymbolState::Resolved, OnCompletion); auto Unresolved = Resolver->lookup(std::move(Q), SymbolNameSet({Foo, Bar, Baz})); @@ -111,7 +113,8 @@ TEST_F(LegacyAPIsStandardTest, LegacyLookupHelpersFn) { << "Wrong flags for bar"; }; - AsynchronousSymbolQuery Q({Foo, Bar}, SymbolState::Resolved, OnCompletion); + AsynchronousSymbolQuery Q(SymbolLookupSet({Foo, Bar}), SymbolState::Resolved, + OnCompletion); auto Unresolved = lookupWithLegacyFn(ES, Q, SymbolNameSet({Foo, Bar, Baz}), LegacyLookup); diff --git a/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp index ecb8cf653937f..f1c0da6a9abb6 100644 --- a/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp @@ -63,8 +63,9 @@ static bool testSetProcessAllSections(std::unique_ptr Obj, ObjLayer.setProcessAllSections(ProcessAllSections); cantFail(ObjLayer.add(JD, std::move(Obj), ES.allocateVModule())); - ES.lookup(JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Resolved, - OnResolveDoNothing, NoDependenciesToRegister); + ES.lookup(LookupKind::Static, makeJITDylibSearchOrder(&JD), + SymbolLookupSet(Foo), SymbolState::Resolved, OnResolveDoNothing, + NoDependenciesToRegister); return DebugSectionSeen; } @@ -160,7 +161,8 @@ TEST(RTDyldObjectLinkingLayerTest, TestOverrideObjectFlags) { cantFail(CompileLayer.add(JD, std::move(M), ES.allocateVModule())); ES.lookup( - JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Resolved, + LookupKind::Static, makeJITDylibSearchOrder(&JD), SymbolLookupSet(Foo), + SymbolState::Resolved, [](Expected R) { cantFail(std::move(R)); }, NoDependenciesToRegister); } @@ -225,7 +227,8 @@ TEST(RTDyldObjectLinkingLayerTest, TestAutoClaimResponsibilityForSymbols) { cantFail(CompileLayer.add(JD, std::move(M), ES.allocateVModule())); ES.lookup( - JITDylibSearchList({{&JD, false}}), {Foo}, SymbolState::Resolved, + LookupKind::Static, makeJITDylibSearchOrder(&JD), SymbolLookupSet(Foo), + SymbolState::Resolved, [](Expected R) { cantFail(std::move(R)); }, NoDependenciesToRegister); } From a7abe6eac061a999e88d651e4857872f5ec52e5d Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 28 Nov 2019 13:34:32 -0800 Subject: [PATCH 193/591] [LegacyPassManager] Simplify PMStack pop --- llvm/lib/IR/LegacyPassManager.cpp | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 8fe59912f20ac..f3c13728d0cb8 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -1776,16 +1776,10 @@ LLVM_DUMP_METHOD void PMStack::dump() const { void ModulePass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { // Find Module Pass Manager - while (!PMS.empty()) { - PassManagerType TopPMType = PMS.top()->getPassManagerType(); - if (TopPMType == PreferredType) - break; // We found desired pass manager - else if (TopPMType > PMT_ModulePassManager) - PMS.pop(); // Pop children pass managers - else - break; - } - assert(!PMS.empty() && "Unable to find appropriate Pass Manager"); + PassManagerType T; + while ((T = PMS.top()->getPassManagerType()) > PMT_ModulePassManager && + T != PreferredType) + PMS.pop(); PMS.top()->add(this); } @@ -1793,21 +1787,15 @@ void ModulePass::assignPassManager(PMStack &PMS, /// in the PM Stack and add self into that manager. void FunctionPass::assignPassManager(PMStack &PMS, PassManagerType PreferredType) { - // Find Function Pass Manager - while (!PMS.empty()) { - if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager) - PMS.pop(); - else - break; - } + while (PMS.top()->getPassManagerType() > PMT_FunctionPassManager) + PMS.pop(); // Create new Function Pass Manager if needed. FPPassManager *FPP; if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) { FPP = (FPPassManager *)PMS.top(); } else { - assert(!PMS.empty() && "Unable to create Function Pass Manager"); PMDataManager *PMD = PMS.top(); // [1] Create new Function Pass Manager From 1abd4c94d7575e4cd288e0024c1ec79f17b048a9 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Thu, 28 Nov 2019 15:56:00 -0500 Subject: [PATCH 194/591] [Clang] Bypass distro detection on non-Linux hosts Skip distro detection when we're not running on Linux, or when the target triple is not Linux. This saves a few OS calls for each invocation of clang.exe. Differential Revision: https://reviews.llvm.org/D70467 --- clang/include/clang/Driver/Distro.h | 3 +- clang/lib/Driver/Distro.cpp | 22 +++++++- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/lib/Driver/ToolChains/Cuda.cpp | 3 +- clang/lib/Driver/ToolChains/Linux.cpp | 4 +- clang/unittests/Driver/DistroTest.cpp | 80 +++++++++++++++++++++++---- 6 files changed, 95 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index da8f819dee964..d382cf77a8b22 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -9,6 +9,7 @@ #ifndef LLVM_CLANG_DRIVER_DISTRO_H #define LLVM_CLANG_DRIVER_DISTRO_H +#include "llvm/ADT/Triple.h" #include "llvm/Support/VirtualFileSystem.h" namespace clang { @@ -84,7 +85,7 @@ class Distro { Distro(DistroType D) : DistroVal(D) {} /// Detects the distribution using specified VFS. - explicit Distro(llvm::vfs::FileSystem &VFS); + explicit Distro(llvm::vfs::FileSystem &VFS, const llvm::Triple &TargetOrHost); bool operator==(const Distro &Other) const { return DistroVal == Other.DistroVal; diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 92e04108a7e29..06707fefc9d08 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -13,11 +13,28 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/Triple.h" using namespace clang::driver; using namespace clang; -static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { +static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS, + const llvm::Triple &TargetOrHost) { + // If we don't target Linux, no need to check the distro. This saves a few + // OS calls. + if (!TargetOrHost.isOSLinux()) + return Distro::UnknownDistro; + + // If the host is not running Linux, and we're backed by a real file system, + // no need to check the distro. This is the case where someone is + // cross-compiling from BSD or Windows to Linux, and it would be meaningless + // to try to figure out the "distro" of the non-Linux host. + IntrusiveRefCntPtr RealFS = + llvm::vfs::getRealFileSystem(); + llvm::Triple HostTriple(llvm::sys::getProcessTriple()); + if (!HostTriple.isOSLinux() && &VFS == RealFS.get()) + return Distro::UnknownDistro; + llvm::ErrorOr> File = VFS.getBufferForFile("/etc/lsb-release"); if (File) { @@ -149,4 +166,5 @@ static Distro::DistroType DetectDistro(llvm::vfs::FileSystem &VFS) { return Distro::UnknownDistro; } -Distro::Distro(llvm::vfs::FileSystem &VFS) : DistroVal(DetectDistro(VFS)) {} +Distro::Distro(llvm::vfs::FileSystem &VFS, const llvm::Triple &TargetOrHost) + : DistroVal(DetectDistro(VFS, TargetOrHost)) {} diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 26d13c7146701..03a6de812047b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5619,7 +5619,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, TC.getTriple().isOSBinFormatCOFF()) && !TC.getTriple().isPS4() && !TC.getTriple().isOSNetBSD() && - !Distro(D.getVFS()).IsGentoo() && + !Distro(D.getVFS(), TC.getTriple()).IsGentoo() && !TC.getTriple().isAndroid() && TC.useIntegratedAs())) CmdArgs.push_back("-faddrsig"); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8c704a3078adc..02871d2ce411f 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -115,7 +115,8 @@ CudaInstallationDetector::CudaInstallationDetector( for (const char *Ver : Versions) Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver); - if (Distro(D.getVFS()).IsDebian() || Distro(D.getVFS()).IsUbuntu()) + Distro Dist(D.getVFS(), llvm::Triple(llvm::sys::getProcessTriple())); + if (Dist.IsDebian() || Dist.IsUbuntu()) // Special case for Debian to have nvidia-cuda-toolkit work // out of the box. More info on http://bugs.debian.org/882505 Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda"); diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 087783875ffe2..736a2d435ca5c 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -240,7 +240,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) .str()); } - Distro Distro(D.getVFS()); + Distro Distro(D.getVFS(), Triple); if (Distro.IsAlpineLinux() || Triple.isAndroid()) { ExtraOpts.push_back("-z"); @@ -511,7 +511,7 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { const llvm::Triple::ArchType Arch = getArch(); const llvm::Triple &Triple = getTriple(); - const Distro Distro(getDriver().getVFS()); + const Distro Distro(getDriver().getVFS(), Triple); if (Triple.isAndroid()) return Triple.isArch64Bit() ? "/system/bin/linker64" : "/system/bin/linker"; diff --git a/clang/unittests/Driver/DistroTest.cpp b/clang/unittests/Driver/DistroTest.cpp index d0c86d1c54c9e..391c0baaadf5c 100644 --- a/clang/unittests/Driver/DistroTest.cpp +++ b/clang/unittests/Driver/DistroTest.cpp @@ -44,7 +44,7 @@ TEST(DistroTest, DetectUbuntu) { "SUPPORT_URL=\"http://help.ubuntu.com/\"\n" "BUG_REPORT_URL=\"http://bugs.launchpad.net/ubuntu/\"\n")); - Distro UbuntuTrusty{UbuntuTrustyFileSystem}; + Distro UbuntuTrusty{UbuntuTrustyFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::UbuntuTrusty), UbuntuTrusty); ASSERT_TRUE(UbuntuTrusty.IsUbuntu()); ASSERT_FALSE(UbuntuTrusty.IsRedhat()); @@ -52,6 +52,9 @@ TEST(DistroTest, DetectUbuntu) { ASSERT_FALSE(UbuntuTrusty.IsDebian()); ASSERT_FALSE(UbuntuTrusty.IsGentoo()); + Distro UbuntuTrusty2{UbuntuTrustyFileSystem, llvm::Triple("unknown-pc-windows")}; + ASSERT_EQ(Distro(Distro::UnknownDistro), UbuntuTrusty2); + llvm::vfs::InMemoryFileSystem UbuntuYakketyFileSystem; UbuntuYakketyFileSystem.addFile("/etc/debian_version", 0, llvm::MemoryBuffer::getMemBuffer("stretch/sid\n")); @@ -74,7 +77,7 @@ TEST(DistroTest, DetectUbuntu) { "VERSION_CODENAME=yakkety\n" "UBUNTU_CODENAME=yakkety\n")); - Distro UbuntuYakkety{UbuntuYakketyFileSystem}; + Distro UbuntuYakkety{UbuntuYakketyFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::UbuntuYakkety), UbuntuYakkety); ASSERT_TRUE(UbuntuYakkety.IsUbuntu()); ASSERT_FALSE(UbuntuYakkety.IsRedhat()); @@ -109,7 +112,7 @@ TEST(DistroTest, DetectRedhat) { "REDHAT_SUPPORT_PRODUCT=\"Fedora\"\n" "REDHAT_SUPPORT_PRODUCT_VERSION=25\n" "PRIVACY_POLICY_URL=https://fedoraproject.org/wiki/Legal:PrivacyPolicy\n")); - Distro Fedora25{Fedora25FileSystem}; + Distro Fedora25{Fedora25FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::Fedora), Fedora25); ASSERT_FALSE(Fedora25.IsUbuntu()); ASSERT_TRUE(Fedora25.IsRedhat()); @@ -146,7 +149,7 @@ TEST(DistroTest, DetectRedhat) { "REDHAT_SUPPORT_PRODUCT=\"centos\"\n" "REDHAT_SUPPORT_PRODUCT_VERSION=\"7\"\n")); - Distro CentOS7{CentOS7FileSystem}; + Distro CentOS7{CentOS7FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::RHEL7), CentOS7); ASSERT_FALSE(CentOS7.IsUbuntu()); ASSERT_TRUE(CentOS7.IsRedhat()); @@ -174,7 +177,7 @@ TEST(DistroTest, DetectOpenSUSE) { "HOME_URL=\"https://opensuse.org/\"\n" "ID_LIKE=\"suse\"\n")); - Distro OpenSUSELeap421{OpenSUSELeap421FileSystem}; + Distro OpenSUSELeap421{OpenSUSELeap421FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::OpenSUSE), OpenSUSELeap421); ASSERT_FALSE(OpenSUSELeap421.IsUbuntu()); ASSERT_FALSE(OpenSUSELeap421.IsRedhat()); @@ -200,7 +203,7 @@ TEST(DistroTest, DetectOpenSUSE) { "HOME_URL=\"https://opensuse.org/\"\n" "ID_LIKE=\"suse\"\n")); - Distro OpenSUSE132{OpenSUSE132FileSystem}; + Distro OpenSUSE132{OpenSUSE132FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::OpenSUSE), OpenSUSE132); ASSERT_FALSE(OpenSUSE132.IsUbuntu()); ASSERT_FALSE(OpenSUSE132.IsRedhat()); @@ -217,7 +220,7 @@ TEST(DistroTest, DetectOpenSUSE) { llvm::MemoryBuffer::getMemBuffer("LSB_VERSION=\"core-2.0-noarch:core-3.0-noarch:core-2.0-x86_64:core-3.0-x86_64\"\n")); // SLES10 is unsupported and therefore evaluates to unknown - Distro SLES10{SLES10FileSystem}; + Distro SLES10{SLES10FileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::UnknownDistro), SLES10); ASSERT_FALSE(SLES10.IsUbuntu()); ASSERT_FALSE(SLES10.IsRedhat()); @@ -240,7 +243,7 @@ TEST(DistroTest, DetectDebian) { "SUPPORT_URL=\"http://www.debian.org/support\"\n" "BUG_REPORT_URL=\"https://bugs.debian.org/\"\n")); - Distro DebianJessie{DebianJessieFileSystem}; + Distro DebianJessie{DebianJessieFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::DebianJessie), DebianJessie); ASSERT_FALSE(DebianJessie.IsUbuntu()); ASSERT_FALSE(DebianJessie.IsRedhat()); @@ -259,7 +262,7 @@ TEST(DistroTest, DetectDebian) { "SUPPORT_URL=\"http://www.debian.org/support\"\n" "BUG_REPORT_URL=\"https://bugs.debian.org/\"\n")); - Distro DebianStretchSid{DebianStretchSidFileSystem}; + Distro DebianStretchSid{DebianStretchSidFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::DebianStretch), DebianStretchSid); ASSERT_FALSE(DebianStretchSid.IsUbuntu()); ASSERT_FALSE(DebianStretchSid.IsRedhat()); @@ -281,7 +284,7 @@ TEST(DistroTest, DetectExherbo) { "SUPPORT_URL=\"irc://irc.freenode.net/#exherbo\"\n" "BUG_REPORT_URL=\"https://bugs.exherbo.org/\"\n")); - Distro Exherbo{ExherboFileSystem}; + Distro Exherbo{ExherboFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::Exherbo), Exherbo); ASSERT_FALSE(Exherbo.IsUbuntu()); ASSERT_FALSE(Exherbo.IsRedhat()); @@ -303,7 +306,7 @@ TEST(DistroTest, DetectArchLinux) { "SUPPORT_URL=\"https://bbs.archlinux.org/\"\n" "BUG_REPORT_URL=\"https://bugs.archlinux.org/\"\n")); - Distro ArchLinux{ArchLinuxFileSystem}; + Distro ArchLinux{ArchLinuxFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::ArchLinux), ArchLinux); ASSERT_FALSE(ArchLinux.IsUbuntu()); ASSERT_FALSE(ArchLinux.IsRedhat()); @@ -328,7 +331,7 @@ TEST(DistroTest, DetectGentoo) { "SUPPORT_URL=\"https://www.gentoo.org/support/\"\n" "BUG_REPORT_URL=\"https://bugs.gentoo.org/\"\n")); - Distro Gentoo{GentooFileSystem}; + Distro Gentoo{GentooFileSystem, llvm::Triple("unknown-pc-linux")}; ASSERT_EQ(Distro(Distro::Gentoo), Gentoo); ASSERT_FALSE(Gentoo.IsUbuntu()); ASSERT_FALSE(Gentoo.IsRedhat()); @@ -337,4 +340,57 @@ TEST(DistroTest, DetectGentoo) { ASSERT_TRUE(Gentoo.IsGentoo()); } +TEST(DistroTest, DetectWindowsAndCrossCompile) { + + class CountingFileSystem : public llvm::vfs::ProxyFileSystem { + public: + CountingFileSystem() : ProxyFileSystem(llvm::vfs::getRealFileSystem()) {} + + llvm::ErrorOr status(const llvm::Twine &Path) override { + ++Count; + return llvm::vfs::ProxyFileSystem::status(Path); + } + + llvm::ErrorOr> + openFileForRead(const llvm::Twine &Path) override { + ++Count; + return llvm::vfs::ProxyFileSystem::openFileForRead(Path); + } + + unsigned Count{}; + }; + + llvm::IntrusiveRefCntPtr RFS = + llvm::vfs::getRealFileSystem(); + llvm::Triple Host(llvm::sys::getProcessTriple()); + + CountingFileSystem CFileSystem; + Distro LinuxDistro{CFileSystem, llvm::Triple("unknown-pc-linux")}; + if (Host.isOSWindows()) { + ASSERT_EQ(Distro(Distro::UnknownDistro), LinuxDistro); + ASSERT_GT(CFileSystem.Count, 0U); + } + + Distro WinDistro{CFileSystem, llvm::Triple("unknown-pc-windows")}; + ASSERT_EQ(Distro(Distro::UnknownDistro), WinDistro); + ASSERT_GT(CFileSystem.Count, 0U); + + // When running on Windows along with a real file system, ensure that no + // distro is returned if targeting Linux + if (Host.isOSWindows()) { + Distro LinuxRealDistro{*RFS, llvm::Triple("unknown-pc-linux")}; + ASSERT_EQ(Distro(Distro::UnknownDistro), LinuxRealDistro); + } + // When running on Linux, check if the distro is the same as the host when + // targeting Linux + if (Host.isOSLinux()) { + Distro HostDistro{*RFS, Host}; + Distro LinuxRealDistro{*RFS, llvm::Triple("unknown-pc-linux")}; + ASSERT_EQ(HostDistro, LinuxRealDistro); + } + + Distro WinRealDistro{*RFS, llvm::Triple("unknown-pc-windows")}; + ASSERT_EQ(Distro(Distro::UnknownDistro), WinRealDistro); +} + } // end anonymous namespace From 4adddbd8ad4f71e8ce4cb4a38c755b126c5e9eff Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 28 Nov 2019 14:00:12 -0800 Subject: [PATCH 195/591] [LegacyPassManager] Simplify FunctionPass::assignPassManager And make it clear the parameter PreferredType is unused for FunctionPass. --- llvm/lib/IR/LegacyPassManager.cpp | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index f3c13728d0cb8..90239bb762989 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -1786,36 +1786,32 @@ void ModulePass::assignPassManager(PMStack &PMS, /// Find appropriate Function Pass Manager or Call Graph Pass Manager /// in the PM Stack and add self into that manager. void FunctionPass::assignPassManager(PMStack &PMS, - PassManagerType PreferredType) { + PassManagerType /*PreferredType*/) { // Find Function Pass Manager - while (PMS.top()->getPassManagerType() > PMT_FunctionPassManager) + PMDataManager *PM; + while (PM = PMS.top(), PM->getPassManagerType() > PMT_FunctionPassManager) PMS.pop(); // Create new Function Pass Manager if needed. - FPPassManager *FPP; - if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) { - FPP = (FPPassManager *)PMS.top(); - } else { - PMDataManager *PMD = PMS.top(); - + if (PM->getPassManagerType() != PMT_FunctionPassManager) { // [1] Create new Function Pass Manager - FPP = new FPPassManager(); + auto *FPP = new FPPassManager; FPP->populateInheritedAnalysis(PMS); // [2] Set up new manager's top level manager - PMTopLevelManager *TPM = PMD->getTopLevelManager(); - TPM->addIndirectPassManager(FPP); + PM->getTopLevelManager()->addIndirectPassManager(FPP); // [3] Assign manager to manage this new manager. This may create // and push new managers into PMS - FPP->assignPassManager(PMS, PMD->getPassManagerType()); + FPP->assignPassManager(PMS, PM->getPassManagerType()); // [4] Push new manager into PMS PMS.push(FPP); + PM = FPP; } // Assign FPP as the manager of this pass. - FPP->add(this); + PM->add(this); } PassManagerBase::~PassManagerBase() {} From 4d3198e243fa450a4109fd72ae1999f1a13570fa Mon Sep 17 00:00:00 2001 From: Bryan Chan Date: Thu, 28 Nov 2019 02:46:18 -0500 Subject: [PATCH 196/591] [OpenMP] build offload plugins before testing them Summary: "make check-all" or "make check-libomptarget" would attempt to run offloading tests before the offload plugins are built. This patch corrects that by adding dependencies to the libomptarget CMake rules. Reviewers: jdoerfert Subscribers: mgorny, guansong, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D70803 --- openmp/libomptarget/CMakeLists.txt | 1 + openmp/libomptarget/plugins/CMakeLists.txt | 6 ++++++ openmp/libomptarget/test/CMakeLists.txt | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/CMakeLists.txt b/openmp/libomptarget/CMakeLists.txt index a953662bf8b4d..c1bc29faaf45d 100644 --- a/openmp/libomptarget/CMakeLists.txt +++ b/openmp/libomptarget/CMakeLists.txt @@ -39,6 +39,7 @@ set (LIBOMPTARGET_ALL_TARGETS "${LIBOMPTARGET_ALL_TARGETS} nvptx64-nvidia-cuda") # Once the plugins for the different targets are validated, they will be added to # the list of supported targets in the current system. set (LIBOMPTARGET_SYSTEM_TARGETS "") +set (LIBOMPTARGET_TESTED_PLUGINS "") # Check whether using debug mode. In debug mode, allow dumping progress # messages at runtime by default. Otherwise, it can be enabled diff --git a/openmp/libomptarget/plugins/CMakeLists.txt b/openmp/libomptarget/plugins/CMakeLists.txt index f8048ba69c01f..bb3f9c908087a 100644 --- a/openmp/libomptarget/plugins/CMakeLists.txt +++ b/openmp/libomptarget/plugins/CMakeLists.txt @@ -45,9 +45,14 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$") dl "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exports") + list(APPEND LIBOMPTARGET_TESTED_PLUGINS + "omptarget.rtl.${tmachine_libname}") + # Report to the parent scope that we are building a plugin. set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} ${tmachine_triple}" PARENT_SCOPE) + set(LIBOMPTARGET_TESTED_PLUGINS + "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE) else(LIBOMPTARGET_DEP_LIBFFI_FOUND) libomptarget_say("Not building ${tmachine_name} offloading plugin: libffi dependency not found.") @@ -68,4 +73,5 @@ add_subdirectory(x86_64) # Make sure the parent scope can see the plugins that will be created. set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE) +set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE) diff --git a/openmp/libomptarget/test/CMakeLists.txt b/openmp/libomptarget/test/CMakeLists.txt index 607801e50e5bb..aa3fffcfe60ef 100644 --- a/openmp/libomptarget/test/CMakeLists.txt +++ b/openmp/libomptarget/test/CMakeLists.txt @@ -12,7 +12,7 @@ else() set(LIBOMPTARGET_DEBUG False) endif() -add_openmp_testsuite(check-libomptarget "Running libomptarget tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omptarget omp) +add_openmp_testsuite(check-libomptarget "Running libomptarget tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS}) # Configure the lit.site.cfg.in file set(AUTO_GEN_COMMENT "## Autogenerated by libomptarget configuration.\n# Do not edit!") From ca818f45500800b2840d080a875818e5224c68e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amaury=20S=C3=A9chet?= Date: Fri, 22 Nov 2019 23:39:18 +0100 Subject: [PATCH 197/591] [DAGCombiner] Peek through vector concats when trying to combine shuffles. Summary: This combine showed up as needed when exploring the regression when processing the DAG in topological order. Reviewers: craig.topper, efriedma, RKSimon, lebedev.ri Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68195 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 51 ++++++++++++++---- .../CodeGen/X86/vector-shuffle-combining.ll | 54 +++++++------------ 2 files changed, 61 insertions(+), 44 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e6844e556b11d..6a62bf15929c1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16696,11 +16696,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { /// Convert a disguised subvector insertion into a shuffle: SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && + "Expected extract_vector_elt"); SDValue InsertVal = N->getOperand(1); SDValue Vec = N->getOperand(0); - // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) - // --> (vector_shuffle X, Y) + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), + // InsIndex) + // --> (vector_shuffle X, Y) and variations where shuffle operands may be + // CONCAT_VECTORS. if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isa(InsertVal.getOperand(1))) { @@ -16713,18 +16717,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { // Vec's operand 0 is using indices from 0 to N-1 and // operand 1 from N to 2N - 1, where N is the number of // elements in the vectors. - int XOffset = -1; - if (InsertVal.getOperand(0) == X) { - XOffset = 0; - } else if (InsertVal.getOperand(0) == Y) { - XOffset = X.getValueType().getVectorNumElements(); + SDValue InsertVal0 = InsertVal.getOperand(0); + int ElementOffset = -1; + + // We explore the inputs of the shuffle in order to see if we find the + // source of the extract_vector_elt. If so, we can use it to modify the + // shuffle rather than perform an insert_vector_elt. + SmallVector, 8> ArgWorkList; + ArgWorkList.emplace_back(Mask.size(), Y); + ArgWorkList.emplace_back(0, X); + + while (!ArgWorkList.empty()) { + int ArgOffset; + SDValue ArgVal; + std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); + + if (ArgVal == InsertVal0) { + ElementOffset = ArgOffset; + break; + } + + // Peek through concat_vector. + if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { + int CurrentArgOffset = + ArgOffset + ArgVal.getValueType().getVectorNumElements(); + int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); + for (SDValue Op : reverse(ArgVal->ops())) { + CurrentArgOffset -= Step; + ArgWorkList.emplace_back(CurrentArgOffset, Op); + } + + // Make sure we went through all the elements and did not screw up index + // computation. + assert(CurrentArgOffset == ArgOffset); + } } - if (XOffset != -1) { + if (ElementOffset != -1) { SmallVector NewMask(Mask.begin(), Mask.end()); auto *ExtrIndex = cast(InsertVal.getOperand(1)); - NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue(); assert(NewMask[InsIndex] < (int)(2 * Vec.getValueType().getVectorNumElements()) && NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index b5dff70e234e4..8cf8cab8b79b1 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2914,56 +2914,40 @@ define <8 x i16> @shuffle_extract_insert_double(<8 x i16> %a, <8 x i16> %b) { define <8 x i16> @shuffle_extract_concat_insert(<4 x i16> %lhsa, <4 x i16> %rhsa, <8 x i16> %b) { ; SSE2-LABEL: shuffle_extract_concat_insert: ; SSE2: # %bb.0: -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: pextrw $2, %xmm1, %ecx -; SSE2-NEXT: pextrw $5, %xmm2, %edx -; SSE2-NEXT: pextrw $7, %xmm2, %esi -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,0,3,4,5,6,7] -; SSE2-NEXT: pinsrw $4, %ecx, %xmm0 -; SSE2-NEXT: pinsrw $5, %edx, %xmm0 -; SSE2-NEXT: pinsrw $6, %eax, %xmm0 -; SSE2-NEXT: pinsrw $7, %esi, %xmm0 +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,3,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,3,2,4,5,6,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: shuffle_extract_concat_insert: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pextrw $2, %xmm1, %eax -; SSSE3-NEXT: pextrw $5, %xmm2, %ecx -; SSSE3-NEXT: pextrw $7, %xmm2, %edx -; SSSE3-NEXT: movd %xmm1, %esi +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15] -; SSSE3-NEXT: pinsrw $4, %eax, %xmm0 -; SSSE3-NEXT: pinsrw $5, %ecx, %xmm0 -; SSSE3-NEXT: pinsrw $6, %esi, %xmm0 -; SSSE3-NEXT: pinsrw $7, %edx, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_extract_concat_insert: ; SSE41: # %bb.0: -; SSE41-NEXT: movd %xmm1, %eax -; SSE41-NEXT: pextrw $2, %xmm1, %ecx +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15] +; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15] ; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15] -; SSE41-NEXT: pinsrw $4, %ecx, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm2[5],xmm0[6,7] -; SSE41-NEXT: pinsrw $6, %eax, %xmm0 -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_extract_concat_insert: ; AVX: # %bb.0: -; AVX-NEXT: vmovd %xmm1, %eax -; AVX-NEXT: vpextrw $2, %xmm1, %ecx -; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,2,3,0,1,14,15,8,9,14,15,12,13,14,15] -; AVX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm2[5],xmm0[6,7] -; AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7] +; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,8,9,12,13,12,13,14,15] +; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm2[0,1,6,7,10,11,14,15,14,15,10,11,12,13,14,15] +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; AVX-NEXT: retq %a = shufflevector <4 x i16> %lhsa, <4 x i16> %rhsa, <8 x i32> %a0 = extractelement <8 x i16> %a, i32 0 From 2485fa7739c7ecda629b51f9936457e2aaee2420 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 28 Nov 2019 10:40:50 -0800 Subject: [PATCH 198/591] [LegalizeTypes] Use SoftenFloatRes_Unary in SoftenFloatRes_FCBRT to reduce code. We don't have a STRICT_CBRT ISD opcode, but we can still use SoftenFloatRes_Unary to simplify some code. --- llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 582ec001cc711..c1b2f8edcdb92 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -267,18 +267,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32, RTLIB::CBRT_F64, RTLIB::CBRT_F80, RTLIB::CBRT_F128, - RTLIB::CBRT_PPCF128), - NVT, Op, CallOptions, SDLoc(N)).first; + RTLIB::CBRT_PPCF128)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { From 68ddf434c06e4a17947750de60fe4ab60315f6ad Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 28 Nov 2019 11:36:55 -0800 Subject: [PATCH 199/591] [LegalizeTypes] In SoftenFloatRes_FNEG, always generate integer arithmetic, never fall back to using fsub. We would previously fallback if the type wasn't f32/f64/f128. But I don't think any of the other floating point types ever go through the softening code anyway. So this code is dead. --- .../SelectionDAG/LegalizeFloatTypes.cpp | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c1b2f8edcdb92..dba715bfa06e0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -452,25 +452,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - EVT FloatVT = N->getValueType(0); - if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { - // Expand Y = FNEG(X) -> Y = X ^ sign mask - APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); - return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), - DAG.getConstant(SignMask, dl, NVT)); - } - - // Expand Y = FNEG(X) -> Y = SUB -0.0, X - SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), - GetSoftenedFloat(N->getOperand(0)) }; - TargetLowering::MakeLibCallOptions CallOptions; - return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_F128, - RTLIB::SUB_PPCF128), - NVT, Ops, CallOptions, dl).first; + // Expand Y = FNEG(X) -> Y = X ^ sign mask + APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)), + DAG.getConstant(SignMask, dl, NVT)); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { From 2f3e8cb313583c160d08564b6348dd6f961f2237 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 28 Nov 2019 15:28:27 -0800 Subject: [PATCH 200/591] [LegalizeTypes] Add strict FP support to SoftenFloatRes_FP_ROUND. Fix mistake in SoftenFloatRes_FP_EXTEND. These will be needed for ARM fp-instrinsics.ll which is currently XFAILed. One of the getOperand calls in SoftenFloatRes_FP_EXTEND was not taking strict FP into account. It only affected the call to setTypeListBeforeSoften which only has an effect on some targets. --- .../SelectionDAG/LegalizeFloatTypes.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index dba715bfa06e0..a8f038227bfbc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -493,8 +493,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N), Chain); @@ -523,14 +523,21 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Op = N->getOperand(0); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); TargetLowering::MakeLibCallOptions CallOptions; - EVT OpsVT[1] = { N->getOperand(0).getValueType() }; - CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; + EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); + CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); + std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, + CallOptions, SDLoc(N), + Chain); + if (IsStrict) + ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { From 5190cf8ffd9a7878d79fd92a5e682891f0693377 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 28 Nov 2019 19:30:21 -0500 Subject: [PATCH 201/591] gn build: Add a toggle for building against the commandline tools SDK on macOS --- llvm/utils/gn/build/mac_sdk.gni | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/llvm/utils/gn/build/mac_sdk.gni b/llvm/utils/gn/build/mac_sdk.gni index 7999042a7ab5e..8fa75647afd18 100644 --- a/llvm/utils/gn/build/mac_sdk.gni +++ b/llvm/utils/gn/build/mac_sdk.gni @@ -1,4 +1,15 @@ +declare_args() { + # Set to true if you don't have Xcode installed, but do have the commandline + # tools. + mac_use_commandline_tools_sdk = false +} + # Location of the mac sdk. -# If that's not fixed, might want to shell out to xcrun at gn time to -# retrieve this, but for now this seems to do the trick. -mac_sdk_path = "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk" +# The correct way to do this is to call xcrun (https://reviews.llvm.org/D70835), +# but that makes `gn gen` take twice as long and almost everyone has Xcode +# installed. So require that people who don't have it installed set a gn arg. +if (mac_use_commandline_tools_sdk) { + mac_sdk_path = "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" +} else { + mac_sdk_path = "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk" +} From b0e979724f2679e4e6f5b824144ea89289bd6d56 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 28 Nov 2019 16:42:27 -0800 Subject: [PATCH 202/591] [PassInstrumentation] Remove excess newline for the new pass manager This also removes excess newline for the legacy pass manager when -filter-print-funcs is specified. --- llvm/lib/IR/AsmWriter.cpp | 3 --- llvm/lib/IR/IRPrintingPasses.cpp | 2 +- llvm/test/Other/2010-05-06-Printer.ll | 1 - llvm/test/Other/printer.ll | 35 ++++++++++++--------------- 4 files changed, 16 insertions(+), 25 deletions(-) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 01989b97f7fa0..f9d4b181f862b 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3400,9 +3400,6 @@ void AssemblyWriter::printTypeIdentities() { /// printFunction - Print all aspects of a function. void AssemblyWriter::printFunction(const Function *F) { - // Print out the return type and name. - Out << '\n'; - if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); if (F->isMaterializable()) diff --git a/llvm/lib/IR/IRPrintingPasses.cpp b/llvm/lib/IR/IRPrintingPasses.cpp index 8fa97a3aecb73..03657ff8d9d43 100644 --- a/llvm/lib/IR/IRPrintingPasses.cpp +++ b/llvm/lib/IR/IRPrintingPasses.cpp @@ -57,7 +57,7 @@ PreservedAnalyses PrintFunctionPass::run(Function &F, if (forcePrintModuleIR()) OS << Banner << " (function: " << F.getName() << ")\n" << *F.getParent(); else - OS << Banner << static_cast(F); + OS << Banner << '\n' << static_cast(F); } return PreservedAnalyses::all(); } diff --git a/llvm/test/Other/2010-05-06-Printer.ll b/llvm/test/Other/2010-05-06-Printer.ll index 9e7c9cb6ab4a8..decd977c3d212 100644 --- a/llvm/test/Other/2010-05-06-Printer.ll +++ b/llvm/test/Other/2010-05-06-Printer.ll @@ -16,6 +16,5 @@ define void @foo(){ ;ALL: ModuleID = ;FOO: IR Dump After -;FOO-EMPTY: ;FOO-NEXT: define void @foo() ;FOO-NOT: define void @tester diff --git a/llvm/test/Other/printer.ll b/llvm/test/Other/printer.ll index 9785a17b2280a..8633765628550 100644 --- a/llvm/test/Other/printer.ll +++ b/llvm/test/Other/printer.ll @@ -1,5 +1,7 @@ -; RUN: opt -mem2reg -instcombine -print-after-all -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -passes='mem2reg,instcombine' -print-after-all -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mem2reg -instcombine -print-after-all -disable-output < %s 2>&1 | \ +; RUN: FileCheck --check-prefixes=CHECK,OLDPM %s --implicit-check-not='IR Dump' +; RUN: opt -passes='mem2reg,instcombine' -print-after-all -disable-output < %s 2>&1 | \ +; RUN: FileCheck --check-prefixes=CHECK,NEWPM %s --implicit-check-not='IR Dump' define void @tester(){ ret void } @@ -8,21 +10,14 @@ define void @foo(){ ret void } -;CHECK-NOT: IR Dump After PassManager -;CHECK-NOT: IR Dump After ModuleToFunctionPassAdaptor -; -;CHECK: *** IR Dump After {{Promote Memory to Register|PromotePass}} -;CHECK: define void @tester -;CHECK-NOT: define void @foo -;CHECK: *** IR Dump After {{Combine redundant instructions|InstCombinePass}} -;CHECK: define void @tester -;CHECK-NOT: define void @foo -;CHECK: *** IR Dump After {{Promote Memory to Register|PromotePass}} -;CHECK: define void @foo -;CHECK-NOT: define void @tester -;CHECK: *** IR Dump After {{Combine redundant instructions|InstCombinePass}} -;CHECK: define void @foo -;CHECK-NOT: define void @tester -;CHECK: *** IR Dump After {{Module Verifier|VerifierPass}} -; -;CHECK-NOT: IR Dump After Print Module IR +; NEWPM: *** IR Dump After VerifierPass +; CHECK: *** IR Dump After {{Promote Memory to Register|PromotePass}} +; CHECK-NEXT: define void @tester +; CHECK: *** IR Dump After {{Combine redundant instructions|InstCombinePass}} +; CHECK-NEXT: define void @tester +; OLDPM: *** IR Dump After Module Verifier +; CHECK: *** IR Dump After {{Promote Memory to Register|PromotePass}} +; CHECK-NEXT: define void @foo +; CHECK: *** IR Dump After {{Combine redundant instructions|InstCombinePass}} +; CHECK-NEXT: define void @foo +; CHECK: *** IR Dump After {{Module Verifier|VerifierPass}} From dfedae50018f321cb2189a95936511b2506e5bce Mon Sep 17 00:00:00 2001 From: Hideto Ueno Date: Fri, 29 Nov 2019 06:45:07 +0000 Subject: [PATCH 203/591] [Attributor] Remove dereferenceable_or_null when nonull is present Summary: This patch prevents the simultaneous presence of `dereferenceable` and `dereferenceable_or_null` attribute Reviewers: jdoerfert, sstefan1 Reviewed By: jdoerfert Subscribers: lebedev.ri, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70789 --- llvm/lib/Transforms/IPO/Attributor.cpp | 10 ++++++++++ llvm/test/Transforms/FunctionAttrs/dereferenceable.ll | 9 +++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index faf0cdfd08ed3..e73698620de64 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2971,6 +2971,16 @@ struct AADereferenceableImpl : AADereferenceable { return TrackUse; } + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + ChangeStatus Change = AADereferenceable::manifest(A); + if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) { + removeAttrs({Attribute::DereferenceableOrNull}); + return ChangeStatus::CHANGED; + } + return Change; + } + void getDeducedAttributes(LLVMContext &Ctx, SmallVectorImpl &Attrs) const override { // TODO: Add *_globally support diff --git a/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll b/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll index 1c285fa288370..951b5047747f0 100644 --- a/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll +++ b/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll @@ -30,8 +30,7 @@ define i32* @test3_1(i32* dereferenceable(8) %0) local_unnamed_addr { } define i32* @test3_2(i32* dereferenceable_or_null(32) %0) local_unnamed_addr { -; FIXME: We should not have both deref(x) and deref_or_null(y) with x >= y. -; ATTRIBUTOR: define nonnull dereferenceable(16) i32* @test3_2(i32* nofree nonnull readnone dereferenceable(32) dereferenceable_or_null(32) "no-capture-maybe-returned" %0) +; ATTRIBUTOR: define nonnull dereferenceable(16) i32* @test3_2(i32* nofree nonnull readnone dereferenceable(32) "no-capture-maybe-returned" %0) %ret = getelementptr inbounds i32, i32* %0, i64 4 ret i32* %ret } @@ -202,3 +201,9 @@ define i32* @test_for_minus_index(i32* %p) { store i32 1, i32* %q ret i32* %q } + +define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) { +; ATTRIBUTOR: define void @deref_or_null_and_nonnull(i32* nocapture nofree nonnull writeonly dereferenceable(100) %0) + store i32 1, i32* %0 + ret void +} From 6c742fdbf48ee3ae9afb2ab1568397a3b89276e5 Mon Sep 17 00:00:00 2001 From: Hideto Ueno Date: Fri, 29 Nov 2019 06:55:58 +0000 Subject: [PATCH 204/591] [Attributor] Deduce dereferenceable based on accessed bytes map Summary: This patch introduces the deduction based on load/store instructions whose pointer operand is a non-inbounds GEP instruction. For example if we have, ``` void f(int *u){ u[0] = 0; u[1] = 1; u[2] = 2; } ``` then u must be dereferenceable(12). This patch is inspired by D64258 Reviewers: jdoerfert, spatel, hfinkel, RKSimon, sstefan1, xbolva00, dtemirbulatov Reviewed By: jdoerfert Subscribers: jfb, lebedev.ri, xbolva00, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70714 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 47 +++++++++++++++++++ llvm/lib/Transforms/IPO/Attributor.cpp | 22 +++++++++ .../InferFunctionAttrs/dereferenceable.ll | 43 ++++++++++++++++- 3 files changed, 110 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index a75a047b7fd0d..4f6f823a230b7 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1820,6 +1820,42 @@ struct DerefState : AbstractState { /// State representing for dereferenceable bytes. IncIntegerState<> DerefBytesState; + /// Map representing for accessed memory offsets and sizes. + /// A key is Offset and a value is size. + /// If there is a load/store instruction something like, + /// p[offset] = v; + /// (offset, sizeof(v)) will be inserted to this map. + /// std::map is used because we want to iterate keys in ascending order. + std::map AccessedBytesMap; + + /// Helper function to calculate dereferenceable bytes from current known + /// bytes and accessed bytes. + /// + /// int f(int *A){ + /// *A = 0; + /// *(A+2) = 2; + /// *(A+1) = 1; + /// *(A+10) = 10; + /// } + /// ``` + /// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`. + /// AccessedBytesMap is std::map so it is iterated in accending order on + /// key(Offset). So KnownBytes will be updated like this: |Access | KnownBytes + /// |(0, 4)| 0 -> 4 + /// |(4, 4)| 4 -> 8 + /// |(8, 4)| 8 -> 12 + /// |(40, 4) | 12 (break) + void computeKnownDerefBytesFromAccessedMap() { + int64_t KnownBytes = DerefBytesState.getKnown(); + for (auto &Access : AccessedBytesMap) { + if (KnownBytes < Access.first) + break; + KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second); + } + + DerefBytesState.takeKnownMaximum(KnownBytes); + } + /// State representing that whether the value is globaly dereferenceable. BooleanState GlobalState; @@ -1849,6 +1885,9 @@ struct DerefState : AbstractState { /// Update known dereferenceable bytes. void takeKnownDerefBytesMaximum(uint64_t Bytes) { DerefBytesState.takeKnownMaximum(Bytes); + + // Known bytes might increase. + computeKnownDerefBytesFromAccessedMap(); } /// Update assumed dereferenceable bytes. @@ -1856,6 +1895,14 @@ struct DerefState : AbstractState { DerefBytesState.takeAssumedMinimum(Bytes); } + /// Add accessed bytes to the map. + void addAccessedBytes(int64_t Offset, uint64_t Size) { + AccessedBytesMap[Offset] = std::max(AccessedBytesMap[Offset], Size); + + // Known bytes might increase. + computeKnownDerefBytesFromAccessedMap(); + } + /// Equality for DerefState. bool operator==(const DerefState &R) { return this->DerefBytesState == R.DerefBytesState && diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index e73698620de64..48da7e7bdd03c 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2961,12 +2961,34 @@ struct AADereferenceableImpl : AADereferenceable { const StateType &getState() const override { return *this; } /// } + /// Helper function for collecting accessed bytes in must-be-executed-context + void addAccessedBytesForUse(Attributor &A, const Use *U, + const Instruction *I) { + const Value *UseV = U->get(); + if (!UseV->getType()->isPointerTy()) + return; + + Type *PtrTy = UseV->getType(); + const DataLayout &DL = A.getDataLayout(); + int64_t Offset; + if (const Value *Base = getBasePointerOfAccessPointerOperand( + I, Offset, DL, /*AllowNonInbounds*/ true)) { + if (Base == &getAssociatedValue() && getPointerOperand(I) == UseV) { + uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType()); + addAccessedBytes(Offset, Size); + } + } + return; + } + /// See AAFromMustBeExecutedContext bool followUse(Attributor &A, const Use *U, const Instruction *I) { bool IsNonNull = false; bool TrackUse = false; int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + + addAccessedBytesForUse(A, U, I); takeKnownDerefBytesMaximum(DerefBytes); return TrackUse; } diff --git a/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll b/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll index bf7daba40d430..b6b699fac85ea 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll @@ -48,8 +48,7 @@ define double @PR21780_only_access3_without_inbounds(double* %ptr) { define double @PR21780_without_inbounds(double* %ptr) { ; CHECK-LABEL: @PR21780_without_inbounds(double* %ptr) -; FIXME: this should be @PR21780_without_inbounds(double* nonnull dereferenceable(32) %ptr) -; ATTRIBUTOR-LABEL: @PR21780_without_inbounds(double* nocapture nofree nonnull readonly align 8 dereferenceable(8) %ptr) +; ATTRIBUTOR-LABEL: @PR21780_without_inbounds(double* nocapture nofree nonnull readonly align 8 dereferenceable(32) %ptr) %arrayidx1 = getelementptr double, double* %ptr, i64 1 %arrayidx2 = getelementptr double, double* %ptr, i64 2 @@ -67,6 +66,7 @@ define double @PR21780_without_inbounds(double* %ptr) { define void @gep0(i8* %unused, i8* %other, i8* %ptr) { ; CHECK-LABEL: @gep0(i8* %unused, i8* %other, i8* %ptr) +; ATTRIBUTOR-LABEL: @gep0(i8* nocapture nofree readnone %unused, i8* nocapture nofree nonnull writeonly dereferenceable(1) %other, i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr) %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 @@ -82,6 +82,7 @@ define void @gep0(i8* %unused, i8* %other, i8* %ptr) { define void @ordering(i8* %ptr1, i32* %ptr2) { ; CHECK-LABEL: @ordering(i8* %ptr1, i32* %ptr2) +; ATTRIBUTOR-LABEL: @ordering(i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr1, i32* nocapture nofree nonnull readonly dereferenceable(8) %ptr2) %a20 = getelementptr i32, i32* %ptr2, i64 0 %a12 = getelementptr i8, i8* %ptr1, i64 2 %t12 = load i8, i8* %a12 @@ -99,6 +100,7 @@ define void @ordering(i8* %ptr1, i32* %ptr2) { define void @not_entry_but_guaranteed_to_execute(i8* %ptr) { ; CHECK-LABEL: @not_entry_but_guaranteed_to_execute(i8* %ptr) +; ATTRIBUTOR-LABEL: @not_entry_but_guaranteed_to_execute(i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr) entry: br label %exit exit: @@ -115,6 +117,7 @@ exit: define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { ; CHECK-LABEL: @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) +; ATTRIBUTOR-LABEL: @not_entry_not_guaranteed_to_execute(i8* nocapture nofree readonly %ptr, i1 %cond) entry: br i1 %cond, label %loads, label %exit loads: @@ -133,6 +136,7 @@ exit: define void @partial_in_entry(i16* %ptr, i1 %cond) { ; CHECK-LABEL: @partial_in_entry(i16* %ptr, i1 %cond) +; ATTRIBUTOR-LABEL: @partial_in_entry(i16* nocapture nofree nonnull readonly dereferenceable(4) %ptr, i1 %cond) entry: %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 @@ -152,6 +156,7 @@ exit: define void @volatile_is_not_dereferenceable(i16* %ptr) { ; CHECK-LABEL: @volatile_is_not_dereferenceable(i16* %ptr) +; ATTRIBUTOR-LABEL: @volatile_is_not_dereferenceable(i16* nofree %ptr) %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 @@ -165,6 +170,7 @@ define void @volatile_is_not_dereferenceable(i16* %ptr) { define void @atomic_is_alright(i16* %ptr) { ; CHECK-LABEL: @atomic_is_alright(i16* %ptr) +; ATTRIBUTOR-LABEL: @atomic_is_alright(i16* nocapture nofree nonnull readonly align 2 dereferenceable(6) %ptr) %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 @@ -178,6 +184,7 @@ declare void @may_not_return() define void @not_guaranteed_to_transfer_execution(i16* %ptr) { ; CHECK-LABEL: @not_guaranteed_to_transfer_execution(i16* %ptr) +; ATTRIBUTOR-LABEL: @not_guaranteed_to_transfer_execution(i16* nocapture nonnull readonly dereferenceable(2) %ptr) %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 @@ -192,6 +199,7 @@ define void @not_guaranteed_to_transfer_execution(i16* %ptr) { define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { ; CHECK-LABEL: @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) +; ATTRIBUTOR-LABEL: @variable_gep_index(i8* nocapture nofree readnone %unused, i8* nocapture nofree nonnull readonly dereferenceable(1) %ptr, i64 %variable_index) %arrayidx1 = getelementptr i8, i8* %ptr, i64 %variable_index %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 %t0 = load i8, i8* %ptr @@ -204,6 +212,8 @@ define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { define void @multi_index_gep(<4 x i8>* %ptr) { ; CHECK-LABEL: @multi_index_gep(<4 x i8>* %ptr) +; FIXME: %ptr should be dereferenceable(4) +; ATTRIBUTOR-LABEL: @multi_index_gep(<4 x i8>* nocapture nofree nonnull readonly dereferenceable(1) %ptr) %arrayidx00 = getelementptr <4 x i8>, <4 x i8>* %ptr, i64 0, i64 0 %t0 = load i8, i8* %arrayidx00 ret void @@ -213,6 +223,7 @@ define void @multi_index_gep(<4 x i8>* %ptr) { define void @not_byte_multiple(i9* %ptr) { ; CHECK-LABEL: @not_byte_multiple(i9* %ptr) +; ATTRIBUTOR-LABEL: @not_byte_multiple(i9* nocapture nofree nonnull readonly dereferenceable(2) %ptr) %arrayidx0 = getelementptr i9, i9* %ptr, i64 0 %t0 = load i9, i9* %arrayidx0 ret void @@ -222,6 +233,7 @@ define void @not_byte_multiple(i9* %ptr) { define void @no_pointer_deref(i16* %ptr) { ; CHECK-LABEL: @no_pointer_deref(i16* %ptr) +; ATTRIBUTOR-LABEL: @no_pointer_deref(i16* nocapture nofree readonly %ptr) %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 %t1 = load i16, i16* %arrayidx1 @@ -233,6 +245,7 @@ define void @no_pointer_deref(i16* %ptr) { define void @non_consecutive(i32* %ptr) { ; CHECK-LABEL: @non_consecutive(i32* %ptr) +; ATTRIBUTOR-LABEL: @non_consecutive(i32* nocapture nofree nonnull readonly dereferenceable(8) %ptr) %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 @@ -246,6 +259,7 @@ define void @non_consecutive(i32* %ptr) { define void @more_bytes(i32* dereferenceable(8) %ptr) { ; CHECK-LABEL: @more_bytes(i32* dereferenceable(8) %ptr) +; ATTRIBUTOR-LABEL: @more_bytes(i32* nocapture nofree nonnull readonly dereferenceable(16) %ptr) %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 @@ -261,6 +275,7 @@ define void @more_bytes(i32* dereferenceable(8) %ptr) { define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { ; CHECK-LABEL: @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) +; ATTRIBUTOR-LABEL: @more_bytes_and_not_null(i32* nocapture nofree nonnull readonly dereferenceable(16) %ptr) %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 @@ -276,6 +291,7 @@ define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { define void @better_bytes(i32* dereferenceable(100) %ptr) { ; CHECK-LABEL: @better_bytes(i32* dereferenceable(100) %ptr) +; ATTRIBUTOR-LABEL: @better_bytes(i32* nocapture nofree nonnull readonly dereferenceable(100) %ptr) %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 @@ -289,6 +305,7 @@ define void @better_bytes(i32* dereferenceable(100) %ptr) { define void @bitcast(i32* %arg) { ; CHECK-LABEL: @bitcast(i32* %arg) +; ATTRIBUTOR-LABEL: @bitcast(i32* nocapture nofree nonnull readonly dereferenceable(8) %arg) %ptr = bitcast i32* %arg to float* %arrayidx0 = getelementptr float, float* %ptr, i64 0 %arrayidx1 = getelementptr float, float* %ptr, i64 1 @@ -299,6 +316,7 @@ define void @bitcast(i32* %arg) { define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { ; CHECK-LABEL: @bitcast_different_sizes(double* %arg1, i8* %arg2) +; ATTRIBUTOR-LABEL: @bitcast_different_sizes(double* nocapture nofree nonnull readonly dereferenceable(12) %arg1, i8* nocapture nofree nonnull readonly dereferenceable(16) %arg2) %ptr1 = bitcast double* %arg1 to float* %a10 = getelementptr float, float* %ptr1, i64 0 %a11 = getelementptr float, float* %ptr1, i64 1 @@ -317,6 +335,7 @@ define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { define void @negative_offset(i32* %arg) { ; CHECK-LABEL: @negative_offset(i32* %arg) +; ATTRIBUTOR-LABEL: @negative_offset(i32* nocapture nofree nonnull readonly dereferenceable(4) %arg) %ptr = bitcast i32* %arg to float* %arrayidx0 = getelementptr float, float* %ptr, i64 0 %arrayidx1 = getelementptr float, float* %ptr, i64 -1 @@ -327,6 +346,7 @@ define void @negative_offset(i32* %arg) { define void @stores(i32* %arg) { ; CHECK-LABEL: @stores(i32* %arg) +; ATTRIBUTOR-LABEL: @stores(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) %ptr = bitcast i32* %arg to float* %arrayidx0 = getelementptr float, float* %ptr, i64 0 %arrayidx1 = getelementptr float, float* %ptr, i64 1 @@ -337,6 +357,7 @@ define void @stores(i32* %arg) { define void @load_store(i32* %arg) { ; CHECK-LABEL: @load_store(i32* %arg) +; ATTRIBUTOR-LABEL: @load_store(i32* nocapture nofree nonnull dereferenceable(8) %arg) %ptr = bitcast i32* %arg to float* %arrayidx0 = getelementptr float, float* %ptr, i64 0 %arrayidx1 = getelementptr float, float* %ptr, i64 1 @@ -344,3 +365,21 @@ define void @load_store(i32* %arg) { store float 2.0, float* %arrayidx1 ret void } + +define void @different_size1(i32* %arg) { +; CHECK-LABEL: @different_size1(i32* %arg) +; ATTRIBUTOR-LABEL: @different_size1(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) + %arg-cast = bitcast i32* %arg to double* + store double 0.000000e+00, double* %arg-cast + store i32 0, i32* %arg + ret void +} + +define void @different_size2(i32* %arg) { +; CHECK-LABEL: @different_size2(i32* %arg) +; ATTRIBUTOR-LABEL: @different_size2(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) + store i32 0, i32* %arg + %arg-cast = bitcast i32* %arg to double* + store double 0.000000e+00, double* %arg-cast + ret void +} From 13cbcf1c1a4cbfecba30e21ccd86d688e1437d06 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 28 Nov 2019 16:27:01 +0300 Subject: [PATCH 205/591] [yaml2obj] - Add a way to describe content of the SHT_GNU_verneed section with "Content". There is no way to set raw content for SHT_GNU_verneed section. This patch implements it. Differential revision: https://reviews.llvm.org/D70816 --- llvm/include/llvm/ObjectYAML/ELFYAML.h | 3 +- llvm/lib/ObjectYAML/ELFEmitter.cpp | 28 ++++--- llvm/lib/ObjectYAML/ELFYAML.cpp | 10 ++- .../tools/yaml2obj/ELF/verneed-section.yaml | 78 ++++++++++++++++++- llvm/tools/obj2yaml/elf2yaml.cpp | 4 +- 5 files changed, 109 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index 9e45efc4a5fec..2f39877786ba7 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -324,7 +324,8 @@ struct VerneedEntry { }; struct VerneedSection : Section { - std::vector VerneedV; + Optional Content; + Optional> VerneedV; llvm::yaml::Hex64 Info; VerneedSection() : Section(ChunkKind::Verneed) {} diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 069e3c19523b6..37eeb01fb0998 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1036,15 +1036,24 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, typedef typename ELFT::Vernaux Elf_Vernaux; auto &OS = CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign); + SHeader.sh_info = Section.Info; + + if (Section.Content) { + SHeader.sh_size = writeContent(OS, Section.Content, None); + return; + } + + if (!Section.VerneedV) + return; uint64_t AuxCnt = 0; - for (size_t I = 0; I < Section.VerneedV.size(); ++I) { - const ELFYAML::VerneedEntry &VE = Section.VerneedV[I]; + for (size_t I = 0; I < Section.VerneedV->size(); ++I) { + const ELFYAML::VerneedEntry &VE = (*Section.VerneedV)[I]; Elf_Verneed VerNeed; VerNeed.vn_version = VE.Version; VerNeed.vn_file = DotDynstr.getOffset(VE.File); - if (I == Section.VerneedV.size() - 1) + if (I == Section.VerneedV->size() - 1) VerNeed.vn_next = 0; else VerNeed.vn_next = @@ -1069,9 +1078,8 @@ void ELFState::writeSectionContent(Elf_Shdr &SHeader, } } - SHeader.sh_size = Section.VerneedV.size() * sizeof(Elf_Verneed) + + SHeader.sh_size = Section.VerneedV->size() * sizeof(Elf_Verneed) + AuxCnt * sizeof(Elf_Vernaux); - SHeader.sh_info = Section.Info; } template @@ -1344,10 +1352,12 @@ template void ELFState::finalizeStrings() { // add strings to .dynstr section. for (const ELFYAML::Chunk *Sec : Doc.getSections()) { if (auto VerNeed = dyn_cast(Sec)) { - for (const ELFYAML::VerneedEntry &VE : VerNeed->VerneedV) { - DotDynstr.add(VE.File); - for (const ELFYAML::VernauxEntry &Aux : VE.AuxV) - DotDynstr.add(Aux.Name); + if (VerNeed->VerneedV) { + for (const ELFYAML::VerneedEntry &VE : *VerNeed->VerneedV) { + DotDynstr.add(VE.File); + for (const ELFYAML::VernauxEntry &Aux : VE.AuxV) + DotDynstr.add(Aux.Name); + } } } else if (auto VerDef = dyn_cast(Sec)) { if (VerDef->Entries) diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index ebda4cca97c83..2a9d51486f6a4 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1086,7 +1086,8 @@ static void sectionMapping(IO &IO, ELFYAML::SymverSection &Section) { static void sectionMapping(IO &IO, ELFYAML::VerneedSection &Section) { commonSectionMapping(IO, Section); IO.mapRequired("Info", Section.Info); - IO.mapRequired("Dependencies", Section.VerneedV); + IO.mapOptional("Dependencies", Section.VerneedV); + IO.mapOptional("Content", Section.Content); } static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) { @@ -1427,6 +1428,13 @@ StringRef MappingTraits>::validate( return {}; } + if (const auto *VD = dyn_cast(C.get())) { + if (VD->VerneedV && VD->Content) + return "SHT_GNU_verneed: \"Dependencies\" and \"Content\" can't be used " + "together"; + return {}; + } + return {}; } diff --git a/llvm/test/tools/yaml2obj/ELF/verneed-section.yaml b/llvm/test/tools/yaml2obj/ELF/verneed-section.yaml index 1a1dc34985a27..59e4a0e5f8d85 100644 --- a/llvm/test/tools/yaml2obj/ELF/verneed-section.yaml +++ b/llvm/test/tools/yaml2obj/ELF/verneed-section.yaml @@ -1,7 +1,7 @@ ## Check we are able to handle SHT_GNU_verneed sections. -# RUN: yaml2obj %s -o %t -# RUN: llvm-readobj -V %t | FileCheck %s +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: llvm-readobj -V %t1 | FileCheck %s # CHECK: VersionRequirements [ # CHECK-NEXT: Dependency { @@ -82,3 +82,77 @@ Sections: DynamicSymbols: - Name: f1 Binding: STB_GLOBAL + +## Check we can use "Content" to describe the content. + +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: llvm-readobj --sections --section-data %t2 | FileCheck %s --check-prefix=CONTENT + +# CONTENT: Name: .gnu.version_r +# CONTENT-NEXT: Type: SHT_GNU_verneed +# CONTENT-NEXT: Flags [ (0x2) +# CONTENT-NEXT: SHF_ALLOC (0x2) +# CONTENT-NEXT: ] +# CONTENT-NEXT: Address: 0x0 +# CONTENT-NEXT: Offset: 0x40 +# CONTENT-NEXT: Size: 3 +# CONTENT-NEXT: Link: 0 +# CONTENT-NEXT: Info: 1 +# CONTENT-NEXT: AddressAlignment: 0 +# CONTENT-NEXT: EntrySize: 0 +# CONTENT-NEXT: SectionData ( +# CONTENT-NEXT: 0000: 112233 +# CONTENT-NEXT: ) + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 0x1 + Content: "112233" + +## Check we can omit "Content" and "Dependencies" fields to produce an empty SHT_GNU_verneed section. + +# RUN: yaml2obj --docnum=3 %s -o %t3 +# RUN: llvm-readelf --sections %t3 | FileCheck %s --check-prefix=NO-PROPS + +# NO-PROPS: [Nr] Name Type Address Off Size +# NO-PROPS: [ 1] .gnu.version_r VERNEED 0000000000000000 000040 000000 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 0x0 + +## Check we can't use both "Dependencies" and "Content" together. + +# RUN: not yaml2obj --docnum=4 %s 2>&1 | FileCheck %s --check-prefix=BOTH + +# BOTH: error: SHT_GNU_verneed: "Dependencies" and "Content" can't be used together + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 0x0 + Content: "" + Dependencies: [] diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 77d28d85e6a11..4672e4f9746f6 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -993,6 +993,8 @@ ELFDumper::dumpVerneedSection(const Elf_Shdr *Shdr) { if (!StringTableOrErr) return StringTableOrErr.takeError(); + S->VerneedV.emplace(); + llvm::ArrayRef Data = *Contents; const uint8_t *Buf = Data.data(); while (Buf) { @@ -1019,7 +1021,7 @@ ELFDumper::dumpVerneedSection(const Elf_Shdr *Shdr) { BufAux = Vernaux->vna_next ? BufAux + Vernaux->vna_next : nullptr; } - S->VerneedV.push_back(Entry); + S->VerneedV->push_back(Entry); Buf = Verneed->vn_next ? Buf + Verneed->vn_next : nullptr; } From 06e5ebf8dbd9c19c2ef58e6eee6346de8688bc5b Mon Sep 17 00:00:00 2001 From: David Zarzycki Date: Fri, 29 Nov 2019 09:22:56 +0200 Subject: [PATCH 206/591] [libcxx] Add -Wno-deprecated-copy to the test config --- libcxx/utils/libcxx/test/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 07657ea6e4f8f..befe75c20e76b 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -915,6 +915,7 @@ def configure_warnings(self): self.cxx.addWarningFlagIfSupported('-Wshadow') self.cxx.addWarningFlagIfSupported('-Wno-unused-command-line-argument') self.cxx.addWarningFlagIfSupported('-Wno-attributes') + self.cxx.addWarningFlagIfSupported('-Wno-deprecated-copy') self.cxx.addWarningFlagIfSupported('-Wno-constant-evaluated') self.cxx.addWarningFlagIfSupported('-Wno-pessimizing-move') self.cxx.addWarningFlagIfSupported('-Wno-c++11-extensions') From 7ab14813619189d13382da047409a81c94ebc58d Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 28 Nov 2019 13:20:25 +0300 Subject: [PATCH 207/591] [llvm-readelf/llvm-readobj] - Check version of SHT_GNU_verdef section entries when dumping. Elfxx_Verdef contains the following field: vd_version Version revision. This field shall be set to 1. (https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html) Our code should check the struct version for correctness. This patch does that. (This will help to simplify or eliminate ELFDumper::LoadVersionDefs() which has it's own logic to parse version definitions for no reason. It checks the struct version currently). Differential revision: https://reviews.llvm.org/D70810 --- .../llvm-readobj/elf-verdef-invalid.test | 42 +++++++++++++++---- llvm/tools/llvm-readobj/ELFDumper.cpp | 6 +++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test index 3a4de36983857..44be4f2fdac40 100644 --- a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test +++ b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test @@ -91,7 +91,7 @@ Sections: Link: .dynstr Info: 0x1 Entries: - - Version: 0 + - Version: 1 Flags: 0 VersionNdx: 0 Hash: 0 @@ -122,7 +122,7 @@ Sections: Link: .dynstr Info: 0x1 Entries: - - Version: 0 + - Version: 1 Flags: 0 VersionNdx: 0 Hash: 0 @@ -141,7 +141,7 @@ DynamicSymbols: # PAST-STRTAB-END-LLVM: VersionDefinitions [ # PAST-STRTAB-END-LLVM-NEXT: Definition { -# PAST-STRTAB-END-LLVM-NEXT: Version: 0 +# PAST-STRTAB-END-LLVM-NEXT: Version: 1 # PAST-STRTAB-END-LLVM-NEXT: Flags [ (0x0) # PAST-STRTAB-END-LLVM-NEXT: ] # PAST-STRTAB-END-LLVM-NEXT: Index: 0 @@ -153,7 +153,7 @@ DynamicSymbols: # PAST-STRTAB-END-GNU: Version definition section '.gnu.version_d' contains 1 entries: # PAST-STRTAB-END-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 2 (.strtab) -# PAST-STRTAB-END-GNU-NEXT: 0x0000: Rev: 0 Flags: none Index: 0 Cnt: 1 Name: +# PAST-STRTAB-END-GNU-NEXT: 0x0000: Rev: 1 Flags: none Index: 0 Cnt: 1 Name: --- !ELF FileHeader: @@ -167,7 +167,7 @@ Sections: Link: .strtab Info: 0x1 Entries: - - Version: 0 + - Version: 1 Flags: 0 VersionNdx: 0 Hash: 0 @@ -200,7 +200,7 @@ Sections: Link: .dynstr Info: 0x1 Entries: - - Version: 0 + - Version: 1 Flags: 0 VersionNdx: 0 Hash: 0 @@ -230,7 +230,35 @@ Sections: Link: .dynstr Info: 0x1 ## The byte offset to the auxiliary entry is 0x13, i.e. it is not correctly aligned in memory. - Content: "0000000000000100000000001300000000000000" + Content: "0100000000000100000000001300000000000000" DynamicSymbols: - Name: foo Binding: STB_GLOBAL + +## Check how we handle the case when a version definition entry has an unsupported version. + +# RUN: yaml2obj %s --docnum=9 -o %t9 +# RUN: llvm-readobj -V %t9 2>&1 | FileCheck %s --check-prefix=UNSUPPORTED-VERSION -DFILE=%t9 +# RUN: llvm-readelf -V %t9 2>&1 | FileCheck %s --check-prefix=UNSUPPORTED-VERSION -DFILE=%t9 + +# UNSUPPORTED-VERSION: warning: '[[FILE]]': unable to dump SHT_GNU_verdef section with index 1: version 65278 is not yet supported + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Link: .dynstr + Info: 0x1 + Entries: + - Version: 0xfefe + Flags: 0 + VersionNdx: 0 + Hash: 0 + Names: [] +DynamicSymbols: + - Name: foo diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 79d08d379a1ad..51ea599f4be74 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -409,6 +409,12 @@ ELFDumper::getVersionDefinitions(const Elf_Shdr *Sec) const { ": found a misaligned version definition entry at offset 0x" + Twine::utohexstr(VerdefBuf - Start)); + unsigned Version = *reinterpret_cast(VerdefBuf); + if (Version != 1) + return createError("unable to dump SHT_GNU_verdef section with index " + + Twine(SecNdx) + ": version " + Twine(Version) + + " is not yet supported"); + const Elf_Verdef *D = reinterpret_cast(VerdefBuf); VerDef &VD = *Ret.emplace(Ret.end()); VD.Offset = VerdefBuf - Start; From 99adf047c8d963a2c18b95c0f38691d407e91b99 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Fri, 29 Nov 2019 11:38:27 +0300 Subject: [PATCH 208/591] [llvm-readelf][test] - Update comment in elf-verdef-invalid.test. NFC. It was suggested to change it during review of D70810, but I've forgotten to update it before commit. --- llvm/test/tools/llvm-readobj/elf-verdef-invalid.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test index 44be4f2fdac40..65e88119d0e56 100644 --- a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test +++ b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test @@ -235,7 +235,7 @@ DynamicSymbols: - Name: foo Binding: STB_GLOBAL -## Check how we handle the case when a version definition entry has an unsupported version. +## Check how we handle a version definition entry with an unsupported version. # RUN: yaml2obj %s --docnum=9 -o %t9 # RUN: llvm-readobj -V %t9 2>&1 | FileCheck %s --check-prefix=UNSUPPORTED-VERSION -DFILE=%t9 From def65bb4f5bc87588fa6f849225397c21dcefb2b Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Fri, 29 Nov 2019 09:44:25 +0100 Subject: [PATCH 209/591] [Syntax] Remove unused parameter from `TreeBuilder::markChildToken`. NFC --- clang/lib/Tooling/Syntax/BuildTree.cpp | 35 +++++++++++--------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index dddc265c8c416..b36b8e00f7c1c 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -66,7 +66,7 @@ class syntax::TreeBuilder { void markExprChild(Expr *Child, NodeRole Role); /// Set role for a token starting at \p Loc. - void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); + void markChildToken(SourceLocation Loc, NodeRole R); /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { @@ -255,11 +255,10 @@ class BuildTreeVisitor : public RecursiveASTVisitor { bool WalkUpFromCompoundStmt(CompoundStmt *S) { using NodeRole = syntax::NodeRole; - Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen); + Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); for (auto *Child : S->body()) Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); - Builder.markChildToken(S->getRBracLoc(), tok::r_brace, - NodeRole::CloseParen); + Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::CompoundStatement); @@ -323,7 +322,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromSwitchStmt(SwitchStmt *S) { - Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch, + Builder.markChildToken(S->getSwitchLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), @@ -332,7 +331,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromCaseStmt(CaseStmt *S) { - Builder.markChildToken(S->getKeywordLoc(), tok::kw_case, + Builder.markChildToken(S->getKeywordLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); @@ -342,7 +341,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromDefaultStmt(DefaultStmt *S) { - Builder.markChildToken(S->getKeywordLoc(), tok::kw_default, + Builder.markChildToken(S->getKeywordLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), @@ -351,11 +350,10 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromIfStmt(IfStmt *S) { - Builder.markChildToken(S->getIfLoc(), tok::kw_if, - syntax::NodeRole::IntroducerKeyword); + Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getThen(), syntax::NodeRole::IfStatement_thenStatement); - Builder.markChildToken(S->getElseLoc(), tok::kw_else, + Builder.markChildToken(S->getElseLoc(), syntax::NodeRole::IfStatement_elseKeyword); Builder.markStmtChild(S->getElse(), syntax::NodeRole::IfStatement_elseStatement); @@ -365,8 +363,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromForStmt(ForStmt *S) { - Builder.markChildToken(S->getForLoc(), tok::kw_for, - syntax::NodeRole::IntroducerKeyword); + Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::ForStatement); @@ -374,7 +371,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromWhileStmt(WhileStmt *S) { - Builder.markChildToken(S->getWhileLoc(), tok::kw_while, + Builder.markChildToken(S->getWhileLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), @@ -383,7 +380,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromContinueStmt(ContinueStmt *S) { - Builder.markChildToken(S->getContinueLoc(), tok::kw_continue, + Builder.markChildToken(S->getContinueLoc(), syntax::NodeRole::IntroducerKeyword); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::ContinueStatement); @@ -391,7 +388,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromBreakStmt(BreakStmt *S) { - Builder.markChildToken(S->getBreakLoc(), tok::kw_break, + Builder.markChildToken(S->getBreakLoc(), syntax::NodeRole::IntroducerKeyword); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::BreakStatement); @@ -399,7 +396,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromReturnStmt(ReturnStmt *S) { - Builder.markChildToken(S->getReturnLoc(), tok::kw_return, + Builder.markChildToken(S->getReturnLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnStatement_value); @@ -409,8 +406,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { - Builder.markChildToken(S->getForLoc(), tok::kw_for, - syntax::NodeRole::IntroducerKeyword); + Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::RangeBasedForStatement); @@ -431,8 +427,7 @@ void syntax::TreeBuilder::foldNode(llvm::ArrayRef Range, Pending.foldChildren(Range, New); } -void syntax::TreeBuilder::markChildToken(SourceLocation Loc, - tok::TokenKind Kind, NodeRole Role) { +void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { if (Loc.isInvalid()) return; Pending.assignRole(*findToken(Loc), Role); From 302cb3bc3d7220e09f5dced64ddfdda33b9c49f9 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Fri, 29 Nov 2019 09:48:00 +0100 Subject: [PATCH 210/591] [Syntax] Add a comment explaining the pointer keys in std::map. NFC --- clang/lib/Tooling/Syntax/BuildTree.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index b36b8e00f7c1c..22cdb89b7bfbb 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -209,6 +209,8 @@ class syntax::TreeBuilder { }; /// Maps from the start token to a subtree starting at that token. + /// Keys in the map are pointers into the array of expanded tokens, so + /// pointer order corresponds to the order of preprocessor tokens. /// FIXME: storing the end tokens is redundant. /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. std::map Trees; From 66ab932fcc10f4833cf16875d1e3dbcb81d9c39d Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 28 Nov 2019 16:48:49 +0100 Subject: [PATCH 211/591] [clangd] Correct the file path in Edit::replacements when generating the rename edit. Summary: The file path was set to the file content previously, and it isn't covered by normal clangd & unittest code path (as we only uses the offset, length, replacement text). Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70828 --- clang-tools-extra/clangd/refactor/Rename.cpp | 10 ++++++---- clang-tools-extra/clangd/refactor/Rename.h | 3 ++- clang-tools-extra/clangd/unittests/RenameTests.cpp | 8 +++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index e57bf61dc2e5c..6a3439cc06127 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -349,8 +349,9 @@ llvm::Expected renameOutsideFile( elog("Fail to read file content: {0}", AffectedFileCode.takeError()); continue; } - auto RenameEdit = buildRenameEdit( - *AffectedFileCode, std::move(FileAndOccurrences.second), NewName); + auto RenameEdit = + buildRenameEdit(FilePath, *AffectedFileCode, + std::move(FileAndOccurrences.second), NewName); if (!RenameEdit) { return llvm::make_error( llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath, @@ -451,7 +452,8 @@ llvm::Expected rename(const RenameInputs &RInputs) { return Results; } -llvm::Expected buildRenameEdit(llvm::StringRef InitialCode, +llvm::Expected buildRenameEdit(llvm::StringRef AbsFilePath, + llvm::StringRef InitialCode, std::vector Occurrences, llvm::StringRef NewName) { llvm::sort(Occurrences); @@ -491,7 +493,7 @@ llvm::Expected buildRenameEdit(llvm::StringRef InitialCode, for (const auto &R : OccurrencesOffsets) { auto ByteLength = R.second - R.first; if (auto Err = RenameEdit.add( - tooling::Replacement(InitialCode, R.first, ByteLength, NewName))) + tooling::Replacement(AbsFilePath, R.first, ByteLength, NewName))) return std::move(Err); } return Edit(InitialCode, std::move(RenameEdit)); diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h index c8cfc6d058923..6f38c14a3e2a8 100644 --- a/clang-tools-extra/clangd/refactor/Rename.h +++ b/clang-tools-extra/clangd/refactor/Rename.h @@ -50,7 +50,8 @@ llvm::Expected rename(const RenameInputs &RInputs); /// Generates rename edits that replaces all given occurrences with the /// NewName. /// Exposed for testing only. -llvm::Expected buildRenameEdit(llvm::StringRef InitialCode, +llvm::Expected buildRenameEdit(llvm::StringRef AbsFilePath, + llvm::StringRef InitialCode, std::vector Occurrences, llvm::StringRef NewName); diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 89efb32a2bb53..0615272de372c 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -669,14 +669,16 @@ TEST(CrossFileRenameTests, CrossFileOnLocalSymbol) { TEST(CrossFileRenameTests, BuildRenameEdits) { Annotations Code("[[😂]]"); auto LSPRange = Code.range(); - auto Edit = buildRenameEdit(Code.code(), {LSPRange}, "abc"); + llvm::StringRef FilePath = "/test/TestTU.cpp"; + auto Edit = buildRenameEdit(FilePath, Code.code(), {LSPRange}, "abc"); ASSERT_TRUE(bool(Edit)) << Edit.takeError(); ASSERT_EQ(1UL, Edit->Replacements.size()); + EXPECT_EQ(FilePath, Edit->Replacements.begin()->getFilePath()); EXPECT_EQ(4UL, Edit->Replacements.begin()->getLength()); // Test invalid range. LSPRange.end = {10, 0}; // out of range - Edit = buildRenameEdit(Code.code(), {LSPRange}, "abc"); + Edit = buildRenameEdit(FilePath, Code.code(), {LSPRange}, "abc"); EXPECT_FALSE(Edit); EXPECT_THAT(llvm::toString(Edit.takeError()), testing::HasSubstr("fail to convert")); @@ -687,7 +689,7 @@ TEST(CrossFileRenameTests, BuildRenameEdits) { [[range]] [[range]] )cpp"); - Edit = buildRenameEdit(T.code(), T.ranges(), "abc"); + Edit = buildRenameEdit(FilePath, T.code(), T.ranges(), "abc"); ASSERT_TRUE(bool(Edit)) << Edit.takeError(); EXPECT_EQ(applyEdits(FileEdits{{T.code(), std::move(*Edit)}}).front().second, expectedResult(Code, expectedResult(T, "abc"))); From 407ac2eb5f136af5ddd213b8bcca176481ec5198 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Thu, 28 Nov 2019 19:22:50 +0100 Subject: [PATCH 212/591] [clangd] Log cc1 args at verbose level. Summary: This will help debugging driver issues. Reviewers: kbobyrev Subscribers: ilya-biryukov, javed.absar, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70832 --- clang-tools-extra/clangd/Compiler.cpp | 5 +++-- clang-tools-extra/clangd/Compiler.h | 4 ++-- clang-tools-extra/clangd/TUScheduler.cpp | 4 ++++ clang/include/clang/Frontend/Utils.h | 8 ++++++-- clang/lib/Frontend/CreateInvocationFromCommandLine.cpp | 5 ++++- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clangd/Compiler.cpp b/clang-tools-extra/clangd/Compiler.cpp index 795fd0082594d..eae753b5c9b36 100644 --- a/clang-tools-extra/clangd/Compiler.cpp +++ b/clang-tools-extra/clangd/Compiler.cpp @@ -42,7 +42,8 @@ void IgnoreDiagnostics::HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, std::unique_ptr buildCompilerInvocation(const ParseInputs &Inputs, - clang::DiagnosticConsumer &D) { + clang::DiagnosticConsumer &D, + std::vector *CC1Args) { std::vector ArgStrs; for (const auto &S : Inputs.CompileCommand.CommandLine) ArgStrs.push_back(S.c_str()); @@ -57,7 +58,7 @@ buildCompilerInvocation(const ParseInputs &Inputs, CompilerInstance::createDiagnostics(new DiagnosticOptions, &D, false); std::unique_ptr CI = createInvocationFromCommandLine( ArgStrs, CommandLineDiagsEngine, Inputs.FS, - /*ShouldRecoverOnErrors=*/true); + /*ShouldRecoverOnErrors=*/true, CC1Args); if (!CI) return nullptr; // createInvocationFromCommandLine sets DisableFree. diff --git a/clang-tools-extra/clangd/Compiler.h b/clang-tools-extra/clangd/Compiler.h index 6ab1b0f075f93..51414c37fc042 100644 --- a/clang-tools-extra/clangd/Compiler.h +++ b/clang-tools-extra/clangd/Compiler.h @@ -52,8 +52,8 @@ struct ParseInputs { /// Builds compiler invocation that could be used to build AST or preamble. std::unique_ptr -buildCompilerInvocation(const ParseInputs &Inputs, - clang::DiagnosticConsumer &D); +buildCompilerInvocation(const ParseInputs &Inputs, clang::DiagnosticConsumer &D, + std::vector *CC1Args = nullptr); /// Creates a compiler instance, configured so that: /// - Contents of the parsed file are remapped to \p MainFile. diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index d740c38736957..b51221d7d9033 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -407,8 +407,12 @@ void ASTWorker::update(ParseInputs Inputs, WantDiagnostics WantDiags) { llvm::join(Inputs.CompileCommand.CommandLine, " ")); // Rebuild the preamble and the AST. StoreDiags CompilerInvocationDiagConsumer; + std::vector CC1Args; std::unique_ptr Invocation = buildCompilerInvocation(Inputs, CompilerInvocationDiagConsumer); + // Log cc1 args even (especially!) if creating invocation failed. + if (!CC1Args.empty()) + vlog("cc1 args: {0}", llvm::join(CC1Args, " ")); std::vector CompilerInvocationDiags = CompilerInvocationDiagConsumer.take(); if (!Invocation) { diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h index 0f9b17ee50893..2b142122cb66d 100644 --- a/clang/include/clang/Frontend/Utils.h +++ b/clang/include/clang/Frontend/Utils.h @@ -217,14 +217,18 @@ createChainedIncludesSource(CompilerInstance &CI, /// non-null (and possibly incorrect) CompilerInvocation if any errors were /// encountered. When this flag is false, always return null on errors. /// -/// \return A CompilerInvocation, or 0 if none was built for the given +/// \param CC1Args - if non-null, will be populated with the args to cc1 +/// expanded from \p Args. May be set even if nullptr is returned. +/// +/// \return A CompilerInvocation, or nullptr if none was built for the given /// argument vector. std::unique_ptr createInvocationFromCommandLine( ArrayRef Args, IntrusiveRefCntPtr Diags = IntrusiveRefCntPtr(), IntrusiveRefCntPtr VFS = nullptr, - bool ShouldRecoverOnErrors = false); + bool ShouldRecoverOnErrors = false, + std::vector *CC1Args = nullptr); /// Return the value of the last argument as an integer, or a default. If Diags /// is non-null, emits an error if the argument is given, but non-integral. diff --git a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp index ab62b633cda38..18c4814bbd5cc 100644 --- a/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp +++ b/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp @@ -26,7 +26,8 @@ using namespace llvm::opt; std::unique_ptr clang::createInvocationFromCommandLine( ArrayRef ArgList, IntrusiveRefCntPtr Diags, - IntrusiveRefCntPtr VFS, bool ShouldRecoverOnErorrs) { + IntrusiveRefCntPtr VFS, bool ShouldRecoverOnErorrs, + std::vector *CC1Args) { if (!Diags.get()) { // No diagnostics engine was provided, so create our own diagnostics object // with the default options. @@ -89,6 +90,8 @@ std::unique_ptr clang::createInvocationFromCommandLine( } const ArgStringList &CCArgs = Cmd.getArguments(); + if (CC1Args) + *CC1Args = {CCArgs.begin(), CCArgs.end()}; auto CI = std::make_unique(); if (!CompilerInvocation::CreateFromArgs(*CI, CCArgs, *Diags) && !ShouldRecoverOnErorrs) From 4b24ab181aef58c6e2001e630331385648db3c08 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Fri, 29 Nov 2019 11:10:01 +0100 Subject: [PATCH 213/591] [AST] Remove unused and undefined `TypeLoc::IgnoreMacroDefinitions` function. NFC Looks like an accidental leftover from the older version of the code. --- clang/include/clang/AST/TypeLoc.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h index f305680d775cf..7f1d429ac3b42 100644 --- a/clang/include/clang/AST/TypeLoc.h +++ b/clang/include/clang/AST/TypeLoc.h @@ -173,9 +173,6 @@ class TypeLoc { TypeLoc IgnoreParens() const; - /// Strips MacroDefinitionTypeLocs from a type location. - TypeLoc IgnoreMacroDefinitions() const; - /// Find a type with the location of an explicit type qualifier. /// /// The result, if non-null, will be one of: From a48b5e24747ca83f9f18dff62a4bacb2f7dfd773 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 29 Nov 2019 11:34:18 +0100 Subject: [PATCH 214/591] [lldb][NFC] Fix header guard comment in ThreadSafeDenseMap.h --- lldb/include/lldb/Core/ThreadSafeDenseMap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/include/lldb/Core/ThreadSafeDenseMap.h b/lldb/include/lldb/Core/ThreadSafeDenseMap.h index c485b91acb47a..420cb57635865 100644 --- a/lldb/include/lldb/Core/ThreadSafeDenseMap.h +++ b/lldb/include/lldb/Core/ThreadSafeDenseMap.h @@ -62,4 +62,4 @@ class ThreadSafeDenseMap { } // namespace lldb_private -#endif // liblldb_ThreadSafeSTLMap_h_ +#endif // liblldb_ThreadSafeDenseMap_h_ From e702bdb8598fcb4224f465569e7692a155c3eb3e Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Fri, 29 Nov 2019 11:32:58 +0100 Subject: [PATCH 215/591] [Syntax] Build SimpleDeclaration node that groups multiple declarators Summary: Also remove the temporary TopLevelDeclaration node and add UnknownDeclaration to represent other unknown nodes. See the follow-up change for building more top-level declarations. Adding declarators is also pretty involved and will be done in another follow-up patch. Reviewers: gribozavr2 Reviewed By: gribozavr2 Subscribers: merge_guards_bot, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70787 --- clang/include/clang/Tooling/Syntax/Nodes.h | 53 ++++-- clang/lib/Tooling/Syntax/BuildTree.cpp | 171 ++++++++++++++++---- clang/lib/Tooling/Syntax/Nodes.cpp | 6 +- clang/unittests/Tooling/Syntax/TreeTest.cpp | 118 +++++++++++--- 4 files changed, 270 insertions(+), 78 deletions(-) diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index c40b6bd24817f..c4db4da892c2d 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -37,7 +37,6 @@ namespace syntax { enum class NodeKind : uint16_t { Leaf, TranslationUnit, - TopLevelDeclaration, // Expressions UnknownExpression, @@ -57,7 +56,11 @@ enum class NodeKind : uint16_t { ReturnStatement, RangeBasedForStatement, ExpressionStatement, - CompoundStatement + CompoundStatement, + + // Declarations + UnknownDeclaration, + SimpleDeclaration, }; /// For debugging purposes. llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K); @@ -102,20 +105,6 @@ class TranslationUnit final : public Tree { } }; -/// FIXME: this node is temporary and will be replaced with nodes for various -/// 'declarations' and 'declarators' from the C/C++ grammar -/// -/// Represents any top-level declaration. Only there to give the syntax tree a -/// bit of structure until we implement syntax nodes for declarations and -/// declarators. -class TopLevelDeclaration final : public Tree { -public: - TopLevelDeclaration() : Tree(NodeKind::TopLevelDeclaration) {} - static bool classof(const Node *N) { - return N->kind() == NodeKind::TopLevelDeclaration; - } -}; - /// A base class for all expressions. Note that expressions are not statements, /// even though they are in clang. class Expression : public Tree { @@ -313,6 +302,38 @@ class CompoundStatement final : public Statement { syntax::Leaf *rbrace(); }; +/// A declaration that can appear at the top-level. Note that this does *not* +/// correspond 1-to-1 to clang::Decl. Syntax trees distinguish between top-level +/// declarations (e.g. namespace definitions) and declarators (e.g. variables, +/// typedefs, etc.). Declarators are stored inside SimpleDeclaration. +class Declaration : public Tree { +public: + Declaration(NodeKind K) : Tree(K) {} + static bool classof(const Node *N) { + return NodeKind::UnknownDeclaration <= N->kind() && + N->kind() <= NodeKind::SimpleDeclaration; + } +}; + +/// Declaration of an unknown kind, e.g. not yet supported in syntax trees. +class UnknownDeclaration final : public Declaration { +public: + UnknownDeclaration() : Declaration(NodeKind::UnknownDeclaration) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownDeclaration; + } +}; + +/// Groups multiple declarators (e.g. variables, typedefs, etc.) together. All +/// grouped declarators share the same declaration specifiers (e.g. 'int' or +/// 'typedef'). +class SimpleDeclaration final : public Declaration { +public: + SimpleDeclaration() : Declaration(NodeKind::SimpleDeclaration) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::SimpleDeclaration; + } +}; } // namespace syntax } // namespace clang #endif diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 22cdb89b7bfbb..67081497d04c0 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/Basic/LLVM.h" @@ -56,6 +58,14 @@ class syntax::TreeBuilder { /// Range. void foldNode(llvm::ArrayRef Range, syntax::Tree *New); + /// Must be called with the range of each `DeclaratorDecl`. Ensures the + /// corresponding declarator nodes are covered by `SimpleDeclaration`. + void noticeDeclaratorRange(llvm::ArrayRef Range); + + /// Notifies that we should not consume trailing semicolon when computing + /// token range of \p D. + void noticeDeclaratorWithoutSemicolon(Decl *D); + /// Mark the \p Child node with a corresponding \p Role. All marked children /// should be consumed by foldNode. /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), @@ -94,7 +104,14 @@ class syntax::TreeBuilder { return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } llvm::ArrayRef getRange(const Decl *D) const { - return getRange(D->getBeginLoc(), D->getEndLoc()); + auto Tokens = getRange(D->getBeginLoc(), D->getEndLoc()); + if (llvm::isa(D)) + return Tokens; + if (DeclsWithoutSemicolons.count(D)) + return Tokens; + // FIXME: do not consume trailing semicolon on function definitions. + // Most declarations own a semicolon in syntax trees, but not in clang AST. + return withTrailingSemicolon(Tokens); } llvm::ArrayRef getExprRange(const Expr *E) const { return getRange(E->getBeginLoc(), E->getEndLoc()); @@ -108,14 +125,22 @@ class syntax::TreeBuilder { // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and // all statements that end with those. Consume this semicolon here. - // - // (!) statements never consume 'eof', so looking at the next token is ok. + if (Tokens.back().kind() == tok::semi) + return Tokens; + return withTrailingSemicolon(Tokens); + } + +private: + llvm::ArrayRef + withTrailingSemicolon(llvm::ArrayRef Tokens) const { + assert(!Tokens.empty()); + assert(Tokens.back().kind() != tok::eof); + // (!) we never consume 'eof', so looking at the next token is ok. if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); return Tokens; } -private: /// Finds a token starting at \p L. The token must exist. const syntax::Token *findToken(SourceLocation L) const; @@ -136,6 +161,8 @@ class syntax::TreeBuilder { {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); } + ~Forest() { assert(DelayedFolds.empty()); } + void assignRole(llvm::ArrayRef Range, syntax::NodeRole Role) { assert(!Range.empty()); @@ -148,30 +175,46 @@ class syntax::TreeBuilder { It->second.Role = Role; } - /// Add \p Node to the forest and fill its children nodes based on the \p - /// NodeRange. - void foldChildren(llvm::ArrayRef NodeTokens, + /// Add \p Node to the forest and attach child nodes based on \p Tokens. + void foldChildren(llvm::ArrayRef Tokens, syntax::Tree *Node) { - assert(!NodeTokens.empty()); - assert(Node->firstChild() == nullptr && "node already has children"); - - auto *FirstToken = NodeTokens.begin(); - auto BeginChildren = Trees.lower_bound(FirstToken); - assert(BeginChildren != Trees.end() && - BeginChildren->first == FirstToken && - "fold crosses boundaries of existing subtrees"); - auto EndChildren = Trees.lower_bound(NodeTokens.end()); - assert((EndChildren == Trees.end() || - EndChildren->first == NodeTokens.end()) && - "fold crosses boundaries of existing subtrees"); + // Execute delayed folds inside `Tokens`. + auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); + auto It = BeginExecuted; + for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) + foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End), + It->second.Node); + DelayedFolds.erase(BeginExecuted, It); + + // Attach children to `Node`. + foldChildrenEager(Tokens, Node); + } - // (!) we need to go in reverse order, because we can only prepend. - for (auto It = EndChildren; It != BeginChildren; --It) - Node->prependChildLowLevel(std::prev(It)->second.Node, - std::prev(It)->second.Role); + /// Schedule a call to `foldChildren` that will only be executed when + /// containing node is folded. The range of delayed nodes can be extended by + /// calling `extendDelayedFold`. Only one delayed node for each starting + /// token is allowed. + void foldChildrenDelayed(llvm::ArrayRef Tokens, + syntax::Tree *Node) { + assert(!Tokens.empty()); + bool Inserted = + DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}}) + .second; + (void)Inserted; + assert(Inserted && "Multiple delayed folds start at the same token"); + } - Trees.erase(BeginChildren, EndChildren); - Trees.insert({FirstToken, NodeAndRole(Node)}); + /// If there a delayed fold, starting at `ExtendedRange.begin()`, extends + /// its endpoint to `ExtendedRange.end()` and returns true. + /// Otherwise, returns false. + bool extendDelayedFold(llvm::ArrayRef ExtendedRange) { + assert(!ExtendedRange.empty()); + auto It = DelayedFolds.find(ExtendedRange.data()); + if (It == DelayedFolds.end()) + return false; + assert(It->second.End <= ExtendedRange.end()); + It->second.End = ExtendedRange.end(); + return true; } // EXPECTS: all tokens were consumed and are owned by a single root node. @@ -199,6 +242,30 @@ class syntax::TreeBuilder { } private: + /// Implementation detail of `foldChildren`, does acutal folding ignoring + /// delayed folds. + void foldChildrenEager(llvm::ArrayRef Tokens, + syntax::Tree *Node) { + assert(Node->firstChild() == nullptr && "node already has children"); + + auto *FirstToken = Tokens.begin(); + auto BeginChildren = Trees.lower_bound(FirstToken); + assert((BeginChildren == Trees.end() || + BeginChildren->first == FirstToken) && + "fold crosses boundaries of existing subtrees"); + auto EndChildren = Trees.lower_bound(Tokens.end()); + assert( + (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && + "fold crosses boundaries of existing subtrees"); + + // (!) we need to go in reverse order, because we can only prepend. + for (auto It = EndChildren; It != BeginChildren; --It) + Node->prependChildLowLevel(std::prev(It)->second.Node, + std::prev(It)->second.Role); + + Trees.erase(BeginChildren, EndChildren); + Trees.insert({FirstToken, NodeAndRole(Node)}); + } /// A with a role that should be assigned to it when adding to a parent. struct NodeAndRole { explicit NodeAndRole(syntax::Node *Node) @@ -214,6 +281,13 @@ class syntax::TreeBuilder { /// FIXME: storing the end tokens is redundant. /// FIXME: the key of a map is redundant, it is also stored in NodeForRange. std::map Trees; + + /// See documentation of `foldChildrenDelayed` for details. + struct DelayedFold { + const syntax::Token *End = nullptr; + syntax::Tree *Node = nullptr; + }; + std::map DelayedFolds; }; /// For debugging purposes. @@ -221,6 +295,7 @@ class syntax::TreeBuilder { syntax::Arena &Arena; Forest Pending; + llvm::DenseSet DeclsWithoutSemicolons; }; namespace { @@ -231,20 +306,30 @@ class BuildTreeVisitor : public RecursiveASTVisitor { bool shouldTraversePostOrder() const { return true; } - bool TraverseDecl(Decl *D) { - if (!D || isa(D)) - return RecursiveASTVisitor::TraverseDecl(D); - if (!llvm::isa(D->getDeclContext())) - return true; // Only build top-level decls for now, do not recurse. - return RecursiveASTVisitor::TraverseDecl(D); + bool WalkUpFromDeclaratorDecl(DeclaratorDecl *D) { + // Ensure declarators are covered by SimpleDeclaration. + Builder.noticeDeclaratorRange(Builder.getRange(D)); + // FIXME: build nodes for the declarator too. + return true; + } + bool WalkUpFromTypedefNameDecl(TypedefNameDecl *D) { + // Also a declarator. + Builder.noticeDeclaratorRange(Builder.getRange(D)); + // FIXME: build nodes for the declarator too. + return true; } bool VisitDecl(Decl *D) { - assert(llvm::isa(D->getDeclContext()) && - "expected a top-level decl"); assert(!D->isImplicit()); Builder.foldNode(Builder.getRange(D), - new (allocator()) syntax::TopLevelDeclaration()); + new (allocator()) syntax::UnknownDeclaration()); + return true; + } + + bool WalkUpFromTagDecl(TagDecl *C) { + // Avoid building UnknownDeclaration here, syntatically 'struct X {}' and + // similar are part of declaration specifiers and do not introduce a new + // top-level declaration. return true; } @@ -291,7 +376,11 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } bool TraverseStmt(Stmt *S) { - if (auto *E = llvm::dyn_cast_or_null(S)) { + if (auto *DS = llvm::dyn_cast_or_null(S)) { + // We want to consume the semicolon, make sure SimpleDeclaration does not. + for (auto *D : DS->decls()) + Builder.noticeDeclaratorWithoutSemicolon(D); + } else if (auto *E = llvm::dyn_cast_or_null(S)) { // (!) do not recurse into subexpressions. // we do not have syntax trees for expressions yet, so we only want to see // the first top-level expression. @@ -429,6 +518,18 @@ void syntax::TreeBuilder::foldNode(llvm::ArrayRef Range, Pending.foldChildren(Range, New); } +void syntax::TreeBuilder::noticeDeclaratorRange( + llvm::ArrayRef Range) { + if (Pending.extendDelayedFold(Range)) + return; + Pending.foldChildrenDelayed(Range, + new (allocator()) syntax::SimpleDeclaration); +} + +void syntax::TreeBuilder::noticeDeclaratorWithoutSemicolon(Decl *D) { + DeclsWithoutSemicolons.insert(D); +} + void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { if (Loc.isInvalid()) return; diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 776330ab585fc..b2ed4ffa22c2b 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -16,8 +16,6 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "Leaf"; case NodeKind::TranslationUnit: return OS << "TranslationUnit"; - case NodeKind::TopLevelDeclaration: - return OS << "TopLevelDeclaration"; case NodeKind::UnknownExpression: return OS << "UnknownExpression"; case NodeKind::UnknownStatement: @@ -50,6 +48,10 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "ExpressionStatement"; case NodeKind::CompoundStatement: return OS << "CompoundStatement"; + case NodeKind::UnknownDeclaration: + return OS << "UnknownDeclaration"; + case NodeKind::SimpleDeclaration: + return OS << "SimpleDeclaration"; } llvm_unreachable("unknown node kind"); } diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index c8be48b1361d0..3d30a074ddd82 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -130,7 +130,7 @@ void foo() {} )cpp", R"txt( *: TranslationUnit -|-TopLevelDeclaration +|-SimpleDeclaration | |-int | |-main | |-( @@ -138,7 +138,7 @@ void foo() {} | `-CompoundStatement | |-{ | `-} -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-foo |-( @@ -157,7 +157,7 @@ int main() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-int |-main |-( @@ -202,7 +202,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -224,7 +224,7 @@ void test() { {"void test() { int a = 10; }", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -232,16 +232,18 @@ void test() { `-CompoundStatement |-{ |-DeclarationStatement - | |-int - | |-a - | |-= - | |-10 + | |-SimpleDeclaration + | | |-int + | | |-a + | | |-= + | | `-UnknownExpression + | | `-10 | `-; `-} )txt"}, {"void test() { ; }", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -263,7 +265,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -299,7 +301,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -329,7 +331,7 @@ int test() { return 1; } )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-int |-test |-( @@ -352,7 +354,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -360,18 +362,21 @@ void test() { `-CompoundStatement |-{ |-DeclarationStatement - | |-int - | |-a - | |-[ - | |-3 - | |-] + | |-SimpleDeclaration + | | |-int + | | |-a + | | |-[ + | | |-UnknownExpression + | | | `-3 + | | `-] | `-; |-RangeBasedForStatement | |-for | |-( - | |-int - | |-x - | |-: + | |-SimpleDeclaration + | | |-int + | | |-x + | | `-: | |-UnknownExpression | | `-a | |-) @@ -384,7 +389,7 @@ void test() { // counterpart. {"void main() { foo: return 100; }", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-main |-( @@ -411,7 +416,7 @@ void test() { )cpp", R"txt( *: TranslationUnit -`-TopLevelDeclaration +`-SimpleDeclaration |-void |-test |-( @@ -444,7 +449,70 @@ void test() { | | `-) | `-; `-} -)txt"}}; +)txt"}, + // Multiple declarators group into a single SimpleDeclaration. + {R"cpp( + int *a, b; + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-int + |-* + |-a + |-, + |-b + `-; + )txt"}, + {R"cpp( + typedef int *a, b; + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-typedef + |-int + |-* + |-a + |-, + |-b + `-; + )txt"}, + // Multiple declarators inside a statement. + {R"cpp( +void foo() { + int *a, b; + typedef int *ta, tb; +} + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-void + |-foo + |-( + |-) + `-CompoundStatement + |-{ + |-DeclarationStatement + | |-SimpleDeclaration + | | |-int + | | |-* + | | |-a + | | |-, + | | `-b + | `-; + |-DeclarationStatement + | |-SimpleDeclaration + | | |-typedef + | | |-int + | | |-* + | | |-ta + | | |-, + | | `-tb + | `-; + `-} + )txt"}}; for (const auto &T : Cases) { auto *Root = buildTree(T.first); From e478385e7708d0bcef43559651e6d62e387a507a Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Tue, 19 Nov 2019 09:55:16 +0000 Subject: [PATCH 216/591] [ARM] Fix instruction selection for ARMISD::CMOV with f16 type Summary: In the cases where the CMOV (f16) SDNode is used with condition codes LT, LE, VC or NE, it is successfully selected into a VSEL instruction. In the remaining cases, however, instruction selection fails since VSEL does not support other condition codes. This patch handles such cases by using the single-precision version of the VMOV instruction. Reviewers: ostannard, dmgreen Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70667 --- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 3 +- llvm/lib/Target/ARM/ARMInstrVFP.td | 6 + llvm/test/CodeGen/ARM/cmov_fp16.ll | 261 +++++++++++++++++++ 3 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/ARM/cmov_fp16.ll diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 563fdda561049..de4377ec5a471 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1213,9 +1213,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MBBI = NewMI; return true; } + case ARM::VMOVHcc: case ARM::VMOVScc: case ARM::VMOVDcc: { - unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; + unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), MI.getOperand(1).getReg()) .add(MI.getOperand(2)) diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index fdd961bfbb2f7..90be9a0333ed3 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -2279,6 +2279,12 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), [(set (f32 SPR:$Sd), (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>; + +def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p), + IIC_fpUNA16, + [(set (f16 HPR:$Sd), + (ARMcmov HPR:$Sn, HPR:$Sm, cmovpred:$p))]>, + RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>; } // hasSideEffects //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/ARM/cmov_fp16.ll b/llvm/test/CodeGen/ARM/cmov_fp16.ll new file mode 100644 index 0000000000000..925fed5828112 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cmov_fp16.ll @@ -0,0 +1,261 @@ +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-THUMB,CHECK +; RUN: llc -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 %s -o - | FileCheck %s --check-prefixes CHECK-ARM,CHECK + +define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_ne: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ne i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_eq: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp eq i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_gt: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselgt.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sgt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_ge: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sge i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_lt: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp slt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-LABEL: test_le: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: cmp r2, r3 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-NEXT: vselgt.f16 s0, s0, s2 +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp sle i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_hi: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it hi +; CHECK-THUMB-NEXT: vmovhi.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_hi: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovhi.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ugt i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_hs: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it hs +; CHECK-THUMB-NEXT: vmovhs.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_hs: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovhs.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp uge i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_lo: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it lo +; CHECK-THUMB-NEXT: vmovlo.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_lo: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovlo.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ult i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + +define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) { +; CHECK-THUMB-LABEL: test_ls: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: vmov s2, r0 +; CHECK-THUMB-NEXT: cmp r2, r3 +; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: it ls +; CHECK-THUMB-NEXT: vmovls.f32 s0, s2 +; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ARM-LABEL: test_ls: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: cmp r2, r3 +; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-ARM-NEXT: vmovls.f32 s0, s2 +; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: bx lr +entry: + %x.half = uitofp i32 %x to half + %y.half = uitofp i32 %y to half + %cmp = icmp ule i32 %a, %b + %cond = select i1 %cmp, half %x.half, half %y.half + %0 = bitcast half %cond to i16 + %1 = zext i16 %0 to i32 + ret i32 %1 +} + From 38870af8594726edf32aa0fd8fd9e8916df333af Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Thu, 28 Nov 2019 16:22:44 +0100 Subject: [PATCH 217/591] [lldb] Remove FileSpec->CompileUnit inheritance Summary: CompileUnit is a complicated class. Having it be implicitly convertible to a FileSpec makes reasoning about it even harder. This patch replaces the inheritance by a simple member and an accessor function. This avoid the need for casting in places where one needed to force a CompileUnit to be treated as a FileSpec, and does not add much verbosity elsewhere. It also fixes a bug where we were wrongly comparing CompileUnit& and a CompileUnit*, which compiled due to a combination of this inheritance and the FileSpec*->FileSpec implicit constructor. Reviewers: teemperor, JDevlieghere, jdoerfert Subscribers: lldb-commits Tags: #lldb Differential Revision: https://reviews.llvm.org/D70827 --- lldb/include/lldb/Symbol/CompileUnit.h | 10 ++++---- lldb/source/API/SBCompileUnit.cpp | 4 ++-- lldb/source/Breakpoint/Breakpoint.cpp | 3 ++- lldb/source/Breakpoint/BreakpointLocation.cpp | 2 +- .../BreakpointResolverFileRegex.cpp | 2 +- lldb/source/Commands/CommandCompletions.cpp | 8 ++++--- lldb/source/Commands/CommandObjectSource.cpp | 23 ++++++++++-------- lldb/source/Commands/CommandObjectTarget.cpp | 17 +++++++------ lldb/source/Commands/CommandObjectThread.cpp | 2 +- lldb/source/Core/FileLineResolver.cpp | 4 ++-- lldb/source/Core/FormatEntity.cpp | 3 +-- lldb/source/Core/Module.cpp | 3 ++- lldb/source/Core/SearchFilter.cpp | 16 ++++++++----- lldb/source/Core/SourceManager.cpp | 11 +++++---- .../Breakpad/SymbolFileBreakpad.cpp | 2 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 5 ++-- .../DWARF/SymbolFileDWARFDebugMap.cpp | 2 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 3 +-- lldb/source/Symbol/CompileUnit.cpp | 24 ++++++------------- lldb/source/Symbol/Function.cpp | 3 ++- lldb/source/Symbol/SymbolContext.cpp | 5 ++-- lldb/tools/lldb-test/lldb-test.cpp | 6 +++-- 22 files changed, 82 insertions(+), 76 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h index b5f37f6789007..aec5cc7c8743b 100644 --- a/lldb/include/lldb/Symbol/CompileUnit.h +++ b/lldb/include/lldb/Symbol/CompileUnit.h @@ -13,6 +13,7 @@ #include "lldb/Core/ModuleChild.h" #include "lldb/Symbol/DebugMacros.h" #include "lldb/Symbol/Function.h" +#include "lldb/Symbol/LineTable.h" #include "lldb/Symbol/SourceModule.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/UserID.h" @@ -35,7 +36,6 @@ namespace lldb_private { /// table. class CompileUnit : public std::enable_shared_from_this, public ModuleChild, - public FileSpec, public UserID, public SymbolContextScope { public: @@ -116,9 +116,6 @@ class CompileUnit : public std::enable_shared_from_this, const FileSpec &file_spec, lldb::user_id_t uid, lldb::LanguageType language, lldb_private::LazyBool is_optimized); - /// Destructor - ~CompileUnit() override; - /// Add a function to this compile unit. /// /// Typically called by the SymbolFile plug-ins as they partially parse the @@ -225,6 +222,9 @@ class CompileUnit : public std::enable_shared_from_this, const FileSpec *file_spec_ptr, bool exact, LineEntry *line_entry); + /// Return the primary source file associated with this compile unit. + const FileSpec &GetPrimaryFile() const { return m_file_spec; } + /// Get the line table for the compile unit. /// /// Called by clients and the SymbolFile plug-in. The SymbolFile plug-ins @@ -415,6 +415,8 @@ class CompileUnit : public std::enable_shared_from_this, /// All modules, including the current module, imported by this /// compile unit. std::vector m_imported_modules; + /// The primary file associated with this compile unit. + FileSpec m_file_spec; /// Files associated with this compile unit's line table and /// declarations. FileSpecList m_support_files; diff --git a/lldb/source/API/SBCompileUnit.cpp b/lldb/source/API/SBCompileUnit.cpp index 581bda3635073..d52040d850a95 100644 --- a/lldb/source/API/SBCompileUnit.cpp +++ b/lldb/source/API/SBCompileUnit.cpp @@ -50,7 +50,7 @@ SBFileSpec SBCompileUnit::GetFileSpec() const { SBFileSpec file_spec; if (m_opaque_ptr) - file_spec.SetFileSpec(*m_opaque_ptr); + file_spec.SetFileSpec(m_opaque_ptr->GetPrimaryFile()); return LLDB_RECORD_RESULT(file_spec); } @@ -106,7 +106,7 @@ uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line, if (inline_file_spec && inline_file_spec->IsValid()) file_spec = inline_file_spec->ref(); else - file_spec = *m_opaque_ptr; + file_spec = m_opaque_ptr->GetPrimaryFile(); index = m_opaque_ptr->FindLineEntry( start_idx, line, inline_file_spec ? inline_file_spec->get() : nullptr, diff --git a/lldb/source/Breakpoint/Breakpoint.cpp b/lldb/source/Breakpoint/Breakpoint.cpp index a112542803c47..3ee9ece56776d 100644 --- a/lldb/source/Breakpoint/Breakpoint.cpp +++ b/lldb/source/Breakpoint/Breakpoint.cpp @@ -638,7 +638,8 @@ static bool SymbolContextsMightBeEquivalent(SymbolContext &old_sc, } else { // Otherwise we will compare by name... if (old_sc.comp_unit && new_sc.comp_unit) { - if (FileSpec::Equal(*old_sc.comp_unit, *new_sc.comp_unit, true)) { + if (FileSpec::Equal(old_sc.comp_unit->GetPrimaryFile(), + new_sc.comp_unit->GetPrimaryFile(), true)) { // Now check the functions: if (old_sc.function && new_sc.function && (old_sc.function->GetName() == new_sc.function->GetName())) { diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index 46b8f25c56682..e6d7d85f90605 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -525,7 +525,7 @@ void BreakpointLocation::GetDescription(Stream *s, if (sc.comp_unit != nullptr) { s->EOL(); s->Indent("compile unit = "); - static_cast(sc.comp_unit)->GetFilename().Dump(s); + sc.comp_unit->GetPrimaryFile().GetFilename().Dump(s); if (sc.function != nullptr) { s->EOL(); diff --git a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp index 3cb04263c6dcb..6b600a7cf128f 100644 --- a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp @@ -102,7 +102,7 @@ Searcher::CallbackReturn BreakpointResolverFileRegex::SearchCallback( return eCallbackReturnContinue; CompileUnit *cu = context.comp_unit; - FileSpec cu_file_spec = *(static_cast(cu)); + FileSpec cu_file_spec = cu->GetPrimaryFile(); std::vector line_matches; context.target_sp->GetSourceManager().FindLinesMatchingRegex( cu_file_spec, m_regex, 1, UINT32_MAX, line_matches); diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp index 469a6bbbadf65..d325b724a38fc 100644 --- a/lldb/source/Commands/CommandCompletions.cpp +++ b/lldb/source/Commands/CommandCompletions.cpp @@ -378,8 +378,10 @@ CommandCompletions::SourceFileCompleter::SearchCallback(SearchFilter &filter, } } } else { - const char *cur_file_name = context.comp_unit->GetFilename().GetCString(); - const char *cur_dir_name = context.comp_unit->GetDirectory().GetCString(); + const char *cur_file_name = + context.comp_unit->GetPrimaryFile().GetFilename().GetCString(); + const char *cur_dir_name = + context.comp_unit->GetPrimaryFile().GetDirectory().GetCString(); bool match = false; if (m_file_name && cur_file_name && @@ -391,7 +393,7 @@ CommandCompletions::SourceFileCompleter::SearchCallback(SearchFilter &filter, match = false; if (match) { - m_matching_files.AppendIfUnique(context.comp_unit); + m_matching_files.AppendIfUnique(context.comp_unit->GetPrimaryFile()); } } } diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp index fd1b158afb16d..f2591b4f62563 100644 --- a/lldb/source/Commands/CommandObjectSource.cpp +++ b/lldb/source/Commands/CommandObjectSource.cpp @@ -256,7 +256,8 @@ class CommandObjectSourceInfo : public CommandObjectParsed { if (num_matches > 0) strm << "\n\n"; strm << "Lines found for file " << file_spec_name - << " in compilation unit " << cu->GetFilename() << " in `" + << " in compilation unit " + << cu->GetPrimaryFile().GetFilename() << " in `" << module_file_name << "\n"; cu_header_printed = true; } @@ -1077,7 +1078,8 @@ class CommandObjectSourceList : public CommandObjectParsed { if (m_options.show_bp_locs) { m_breakpoint_locations.Clear(); const bool show_inlines = true; - m_breakpoint_locations.Reset(*sc.comp_unit, 0, show_inlines); + m_breakpoint_locations.Reset(sc.comp_unit->GetPrimaryFile(), 0, + show_inlines); SearchFilterForUnconstrainedSearches target_search_filter( target->shared_from_this()); target_search_filter.Search(m_breakpoint_locations); @@ -1106,8 +1108,8 @@ class CommandObjectSourceList : public CommandObjectParsed { ? sc.line_entry.column : 0; target->GetSourceManager().DisplaySourceLinesWithLineNumbers( - sc.comp_unit, sc.line_entry.line, column, lines_to_back_up, - m_options.num_lines - lines_to_back_up, "->", + sc.comp_unit->GetPrimaryFile(), sc.line_entry.line, column, + lines_to_back_up, m_options.num_lines - lines_to_back_up, "->", &result.GetOutputStream(), GetBreakpointLocations()); result.SetStatus(eReturnStatusSuccessFinishResult); } @@ -1190,18 +1192,18 @@ class CommandObjectSourceList : public CommandObjectParsed { if (num_matches > 1) { bool got_multiple = false; - FileSpec *test_cu_spec = nullptr; + CompileUnit *test_cu = nullptr; for (unsigned i = 0; i < num_matches; i++) { SymbolContext sc; sc_list.GetContextAtIndex(i, sc); if (sc.comp_unit) { - if (test_cu_spec) { - if (test_cu_spec != static_cast(sc.comp_unit)) + if (test_cu) { + if (test_cu != sc.comp_unit) got_multiple = true; break; } else - test_cu_spec = sc.comp_unit; + test_cu = sc.comp_unit; } } if (got_multiple) { @@ -1218,7 +1220,8 @@ class CommandObjectSourceList : public CommandObjectParsed { if (sc.comp_unit) { if (m_options.show_bp_locs) { const bool show_inlines = true; - m_breakpoint_locations.Reset(*sc.comp_unit, 0, show_inlines); + m_breakpoint_locations.Reset(sc.comp_unit->GetPrimaryFile(), 0, + show_inlines); SearchFilterForUnconstrainedSearches target_search_filter( target->shared_from_this()); target_search_filter.Search(m_breakpoint_locations); @@ -1229,7 +1232,7 @@ class CommandObjectSourceList : public CommandObjectParsed { m_options.num_lines = 10; const uint32_t column = 0; target->GetSourceManager().DisplaySourceLinesWithLineNumbers( - sc.comp_unit, m_options.start_line, column, 0, + sc.comp_unit->GetPrimaryFile(), m_options.start_line, column, 0, m_options.num_lines, "", &result.GetOutputStream(), GetBreakpointLocations()); diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index d77207bb82cfc..9f4e58e55e5dd 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -816,15 +816,14 @@ class CommandObjectTargetVariable : public CommandObjectParsed { return; if (sc.module_sp) { if (sc.comp_unit) { - s.Printf("Global variables for %s in %s:\n", - sc.comp_unit->GetPath().c_str(), - sc.module_sp->GetFileSpec().GetPath().c_str()); + s.Format("Global variables for {0} in {1}:\n", + sc.comp_unit->GetPrimaryFile(), sc.module_sp->GetFileSpec()); } else { s.Printf("Global variables for %s\n", sc.module_sp->GetFileSpec().GetPath().c_str()); } } else if (sc.comp_unit) { - s.Printf("Global variables for %s\n", sc.comp_unit->GetPath().c_str()); + s.Format("Global variables for {0}\n", sc.comp_unit->GetPrimaryFile()); } for (VariableSP var_sp : variable_list) { @@ -926,9 +925,9 @@ class CommandObjectTargetVariable : public CommandObjectParsed { if (!success) { if (frame) { if (comp_unit) - result.AppendErrorWithFormat( - "no global variables in current compile unit: %s\n", - comp_unit->GetPath().c_str()); + result.AppendErrorWithFormatv( + "no global variables in current compile unit: {0}\n", + comp_unit->GetPrimaryFile()); else result.AppendErrorWithFormat( "no debug information for frame %u\n", @@ -1327,8 +1326,8 @@ static uint32_t DumpCompileUnitLineTable(CommandInterpreter &interpreter, if (i > 0) strm << "\n\n"; - strm << "Line table for " << *static_cast(sc.comp_unit) - << " in `" << module->GetFileSpec().GetFilename() << "\n"; + strm << "Line table for " << sc.comp_unit->GetPrimaryFile() << " in `" + << module->GetFileSpec().GetFilename() << "\n"; LineTable *line_table = sc.comp_unit->GetLineTable(); if (line_table) line_table->GetDescription( diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index a74eec01933b2..13c17dfe3cca2 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -1193,7 +1193,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed { LineEntry line_entry; const bool exact = false; start_idx_ptr = sc.comp_unit->FindLineEntry( - start_idx_ptr, line_number, sc.comp_unit, exact, &line_entry); + start_idx_ptr, line_number, nullptr, exact, &line_entry); if (start_idx_ptr == UINT32_MAX) break; diff --git a/lldb/source/Core/FileLineResolver.cpp b/lldb/source/Core/FileLineResolver.cpp index 01df295398a83..7d91d1a3e472c 100644 --- a/lldb/source/Core/FileLineResolver.cpp +++ b/lldb/source/Core/FileLineResolver.cpp @@ -36,8 +36,8 @@ FileLineResolver::SearchCallback(SearchFilter &filter, SymbolContext &context, Address *addr) { CompileUnit *cu = context.comp_unit; - if (m_inlines || - m_file_spec.Compare(*cu, m_file_spec, (bool)m_file_spec.GetDirectory())) { + if (m_inlines || m_file_spec.Compare(cu->GetPrimaryFile(), m_file_spec, + (bool)m_file_spec.GetDirectory())) { uint32_t start_file_idx = 0; uint32_t file_idx = cu->GetSupportFiles().FindFileIndex(start_file_idx, m_file_spec, false); diff --git a/lldb/source/Core/FormatEntity.cpp b/lldb/source/Core/FormatEntity.cpp index c90828f40989c..07ca0a68a10b4 100644 --- a/lldb/source/Core/FormatEntity.cpp +++ b/lldb/source/Core/FormatEntity.cpp @@ -1376,8 +1376,7 @@ bool FormatEntity::Format(const Entry &entry, Stream &s, if (sc) { CompileUnit *cu = sc->comp_unit; if (cu) { - // CompileUnit is a FileSpec - if (DumpFile(s, *cu, (FileKind)entry.number)) + if (DumpFile(s, cu->GetPrimaryFile(), (FileKind)entry.number)) return true; } } diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index a14bd3d370a1b..360c8c1345462 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -617,7 +617,8 @@ void Module::FindCompileUnits(const FileSpec &path, for (size_t i = 0; i < num_compile_units; ++i) { sc.comp_unit = GetCompileUnitAtIndex(i).get(); if (sc.comp_unit) { - if (FileSpec::Equal(*sc.comp_unit, path, compare_directory)) + if (FileSpec::Equal(sc.comp_unit->GetPrimaryFile(), path, + compare_directory)) sc_list.Append(sc); } } diff --git a/lldb/source/Core/SearchFilter.cpp b/lldb/source/Core/SearchFilter.cpp index 8f80caa3eb4de..c49b59e601e18 100644 --- a/lldb/source/Core/SearchFilter.cpp +++ b/lldb/source/Core/SearchFilter.cpp @@ -726,8 +726,11 @@ bool SearchFilterByModuleListAndCU::AddressPasses(Address &address) { if (m_cu_spec_list.GetSize() != 0) return false; // Has no comp_unit so can't pass the file check. } - if (m_cu_spec_list.FindFileIndex(0, sym_ctx.comp_unit, false) == UINT32_MAX) - return false; // Fails the file check + FileSpec cu_spec; + if (sym_ctx.comp_unit) + cu_spec = sym_ctx.comp_unit->GetPrimaryFile(); + if (m_cu_spec_list.FindFileIndex(0, cu_spec, false) == UINT32_MAX) + return false; // Fails the file check return SearchFilterByModuleList::ModulePasses(sym_ctx.module_sp); } @@ -736,8 +739,8 @@ bool SearchFilterByModuleListAndCU::CompUnitPasses(FileSpec &fileSpec) { } bool SearchFilterByModuleListAndCU::CompUnitPasses(CompileUnit &compUnit) { - bool in_cu_list = - m_cu_spec_list.FindFileIndex(0, compUnit, false) != UINT32_MAX; + bool in_cu_list = m_cu_spec_list.FindFileIndex(0, compUnit.GetPrimaryFile(), + false) != UINT32_MAX; if (in_cu_list) { ModuleSP module_sp(compUnit.GetModule()); if (module_sp) { @@ -787,8 +790,9 @@ void SearchFilterByModuleListAndCU::Search(Searcher &searcher) { CompUnitSP cu_sp = module_sp->GetCompileUnitAtIndex(cu_idx); matchingContext.comp_unit = cu_sp.get(); if (matchingContext.comp_unit) { - if (m_cu_spec_list.FindFileIndex(0, *matchingContext.comp_unit, - false) != UINT32_MAX) { + if (m_cu_spec_list.FindFileIndex( + 0, matchingContext.comp_unit->GetPrimaryFile(), false) != + UINT32_MAX) { shouldContinue = DoCUIteration(module_sp, matchingContext, searcher); if (shouldContinue == Searcher::eCallbackReturnStop) diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index 42741e4ba4fe4..e3780e0b071af 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -399,24 +399,25 @@ void SourceManager::File::CommonInitializer(const FileSpec &file_spec, if (num_matches != 0) { if (num_matches > 1) { SymbolContext sc; - FileSpec *test_cu_spec = nullptr; + CompileUnit *test_cu = nullptr; for (unsigned i = 0; i < num_matches; i++) { sc_list.GetContextAtIndex(i, sc); if (sc.comp_unit) { - if (test_cu_spec) { - if (test_cu_spec != static_cast(sc.comp_unit)) + if (test_cu) { + if (test_cu != sc.comp_unit) got_multiple = true; break; } else - test_cu_spec = sc.comp_unit; + test_cu = sc.comp_unit; } } } if (!got_multiple) { SymbolContext sc; sc_list.GetContextAtIndex(0, sc); - m_file_spec = sc.comp_unit; + if (sc.comp_unit) + m_file_spec = sc.comp_unit->GetPrimaryFile(); m_mod_time = FileSystem::Instance().GetModificationTime(m_file_spec); } } diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp index 29d2e8a0c6a84..b2c4d08833414 100644 --- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp +++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp @@ -731,7 +731,7 @@ void SymbolFileBreakpad::ParseLineTableAndSupportFiles(CompileUnit &cu, } if (next_addr) finish_sequence(); - data.support_files = map.translate(cu, *m_files); + data.support_files = map.translate(cu.GetPrimaryFile(), *m_files); } void SymbolFileBreakpad::ParseUnwindData() { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index fcdff01dd20b9..837a475c166c8 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1046,7 +1046,8 @@ bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) { comp_unit.SetSupportFiles(ParseSupportFilesFromPrologue( comp_unit.GetModule(), line_table->Prologue, dwarf_cu->GetPathStyle(), - dwarf_cu->GetCompilationDirectory().GetCString(), FileSpec(comp_unit))); + dwarf_cu->GetCompilationDirectory().GetCString(), + comp_unit.GetPrimaryFile())); return true; } @@ -1951,7 +1952,7 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(const FileSpec &file_spec, const bool full_match = (bool)file_spec.GetDirectory(); bool file_spec_matches_cu_file_spec = - FileSpec::Equal(file_spec, *dc_cu, full_match); + FileSpec::Equal(file_spec, dc_cu->GetPrimaryFile(), full_match); if (check_inlines || file_spec_matches_cu_file_spec) { SymbolContext sc(m_objfile_sp->GetModule()); sc.comp_unit = dc_cu; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index dbdbf49929412..d3090ed3b6f2c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -604,7 +604,7 @@ SymbolFileDWARFDebugMap::CompileUnitInfo * SymbolFileDWARFDebugMap::GetCompUnitInfo(const CompileUnit &comp_unit) { const uint32_t cu_count = GetNumCompileUnits(); for (uint32_t i = 0; i < cu_count; ++i) { - if (comp_unit == m_compile_unit_infos[i].compile_unit_sp.get()) + if (&comp_unit == m_compile_unit_infos[i].compile_unit_sp.get()) return &m_compile_unit_infos[i]; } return nullptr; diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index e7bc730ca38b8..dcbefdcbb6f89 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -373,7 +373,7 @@ bool SymbolFilePDB::ParseSupportFiles( // LLDB uses the DWARF-like file numeration (one based), // the zeroth file is the compile unit itself - support_files.Insert(0, comp_unit); + support_files.Insert(0, comp_unit.GetPrimaryFile()); return true; } @@ -1780,7 +1780,6 @@ bool SymbolFilePDB::ParseCompileUnitLineTable(CompileUnit &comp_unit, auto line_table = std::make_unique(&comp_unit); // Find contributions to `compiland` from all source and header files. - std::string path = comp_unit.GetPath(); auto files = m_session_up->getSourceFilesForCompiland(*compiland_up); if (!files) return false; diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index 82074367ec8fb..6aef807f86dca 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -21,30 +21,21 @@ CompileUnit::CompileUnit(const lldb::ModuleSP &module_sp, void *user_data, const char *pathname, const lldb::user_id_t cu_sym_id, lldb::LanguageType language, lldb_private::LazyBool is_optimized) - : ModuleChild(module_sp), FileSpec(pathname), UserID(cu_sym_id), - m_user_data(user_data), m_language(language), m_flags(0), - m_support_files(), m_line_table_up(), m_variables(), - m_is_optimized(is_optimized) { - if (language != eLanguageTypeUnknown) - m_flags.Set(flagsParsedLanguage); - assert(module_sp); -} + : CompileUnit(module_sp, user_data, FileSpec(pathname), cu_sym_id, language, + is_optimized) {} CompileUnit::CompileUnit(const lldb::ModuleSP &module_sp, void *user_data, const FileSpec &fspec, const lldb::user_id_t cu_sym_id, lldb::LanguageType language, lldb_private::LazyBool is_optimized) - : ModuleChild(module_sp), FileSpec(fspec), UserID(cu_sym_id), - m_user_data(user_data), m_language(language), m_flags(0), - m_support_files(), m_line_table_up(), m_variables(), + : ModuleChild(module_sp), UserID(cu_sym_id), m_user_data(user_data), + m_language(language), m_flags(0), m_file_spec(fspec), m_is_optimized(is_optimized) { if (language != eLanguageTypeUnknown) m_flags.Set(flagsParsedLanguage); assert(module_sp); } -CompileUnit::~CompileUnit() {} - void CompileUnit::CalculateSymbolContext(SymbolContext *sc) { sc->comp_unit = this; GetModule()->CalculateSymbolContext(sc); @@ -63,7 +54,7 @@ void CompileUnit::GetDescription(Stream *s, lldb::DescriptionLevel level) const { const char *language = Language::GetNameForLanguageType(m_language); *s << "id = " << (const UserID &)*this << ", file = \"" - << (const FileSpec &)*this << "\", language = \"" << language << '"'; + << this->GetPrimaryFile() << "\", language = \"" << language << '"'; } void CompileUnit::ForeachFunction( @@ -117,8 +108,7 @@ void CompileUnit::Dump(Stream *s, bool show_context) const { s->Printf("%p: ", static_cast(this)); s->Indent(); *s << "CompileUnit" << static_cast(*this) << ", language = \"" - << language << "\", file = '" << static_cast(*this) - << "'\n"; + << language << "\", file = '" << GetPrimaryFile() << "'\n"; // m_types.Dump(s); @@ -255,7 +245,7 @@ void CompileUnit::ResolveSymbolContext(const FileSpec &file_spec, std::vector file_indexes; const bool full_match = (bool)file_spec.GetDirectory(); bool file_spec_matches_cu_file_spec = - FileSpec::Equal(file_spec, *this, full_match); + FileSpec::Equal(file_spec, this->GetPrimaryFile(), full_match); // If we are not looking for inlined functions and our file spec doesn't // match then we are done... diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index 9e81b6140eb76..c392317df0066 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -340,7 +340,8 @@ Block &Function::GetBlock(bool can_create) { "error: unable to find module " "shared pointer for function '%s' " "in %s\n", - GetName().GetCString(), m_comp_unit->GetPath().c_str()); + GetName().GetCString(), + m_comp_unit->GetPrimaryFile().GetPath().c_str()); } m_block.SetBlockInfoHasBeenParsed(true, true); } diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index c5d8547b08c8b..11548c0a5a195 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -316,7 +316,7 @@ void SymbolContext::Dump(Stream *s, Target *target) const { *s << "CompileUnit = " << comp_unit; if (comp_unit != nullptr) s->Format(" {{{0:x-16}} {1}", comp_unit->GetID(), - *static_cast(comp_unit)); + comp_unit->GetPrimaryFile()); s->EOL(); s->Indent(); *s << "Function = " << function; @@ -1055,7 +1055,8 @@ bool SymbolContextSpecifier::SymbolContextMatches(SymbolContext &sc) { // Next check the comp unit, but only if the SymbolContext was not // inlined. if (!was_inlined && sc.comp_unit != nullptr) { - if (!FileSpec::Equal(*(sc.comp_unit), *(m_file_spec_up.get()), false)) + if (!FileSpec::Equal(sc.comp_unit->GetPrimaryFile(), *m_file_spec_up, + false)) return false; } } diff --git a/lldb/tools/lldb-test/lldb-test.cpp b/lldb/tools/lldb-test/lldb-test.cpp index 66c8536301d52..12e4a56059796 100644 --- a/lldb/tools/lldb-test/lldb-test.cpp +++ b/lldb/tools/lldb-test/lldb-test.cpp @@ -549,7 +549,8 @@ Error opts::symbols::findVariables(lldb_private::Module &Module) { CompUnitSP CU; for (size_t Ind = 0; !CU && Ind < Module.GetNumCompileUnits(); ++Ind) { CompUnitSP Candidate = Module.GetCompileUnitAtIndex(Ind); - if (!Candidate || Candidate->GetFilename().GetStringRef() != File) + if (!Candidate || + Candidate->GetPrimaryFile().GetFilename().GetStringRef() != File) continue; if (CU) return make_string_error("Multiple compile units for file `{0}` found.", @@ -653,7 +654,8 @@ Error opts::symbols::verify(lldb_private::Module &Module) { if (!comp_unit) return make_string_error("Connot parse compile unit {0}.", i); - outs() << "Processing '" << comp_unit->GetFilename().AsCString() + outs() << "Processing '" + << comp_unit->GetPrimaryFile().GetFilename().AsCString() << "' compile unit.\n"; LineTable *lt = comp_unit->GetLineTable(); From 4f000824222f97c0cfd5b19951a1068132e57e79 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 29 Nov 2019 12:05:00 +0100 Subject: [PATCH 218/591] [clangd] Fix 407ac2e, which was broken and committed too soon --- clang-tools-extra/clangd/TUScheduler.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index b51221d7d9033..884c82d5b1909 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -408,11 +408,11 @@ void ASTWorker::update(ParseInputs Inputs, WantDiagnostics WantDiags) { // Rebuild the preamble and the AST. StoreDiags CompilerInvocationDiagConsumer; std::vector CC1Args; - std::unique_ptr Invocation = - buildCompilerInvocation(Inputs, CompilerInvocationDiagConsumer); + std::unique_ptr Invocation = buildCompilerInvocation( + Inputs, CompilerInvocationDiagConsumer, &CC1Args); // Log cc1 args even (especially!) if creating invocation failed. if (!CC1Args.empty()) - vlog("cc1 args: {0}", llvm::join(CC1Args, " ")); + vlog("Driver produced command: cc1 {0}", llvm::join(CC1Args, " ")); std::vector CompilerInvocationDiags = CompilerInvocationDiagConsumer.take(); if (!Invocation) { From d1d6049e9d6600f28746379290705b02ffb52d4b Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 29 Nov 2019 12:05:47 +0100 Subject: [PATCH 219/591] [lldb][NFC] Remove dead logging code from DWARFASTParserClang::CompleteRecordType This code is behind a `if (log)` that is always a nullptr as the initializer was commented out. One could uncomment the initializer code, but then this logging code just leads to a deadlock as it tries to aquire the module lock. This removes the logging code until I get this working again. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 71 +------------------ 1 file changed, 1 insertion(+), 70 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index ba17469ea998f..43030c62cb407 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1974,8 +1974,6 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, CompilerType &clang_type) { const dw_tag_t tag = die.Tag(); SymbolFileDWARF *dwarf = die.GetDWARF(); - Log *log = - nullptr; // (LogChannelDWARF::GetLogIfAny(DWARF_LOG_DEBUG_INFO|DWARF_LOG_TYPE_COMPLETION)); ClangASTImporter::LayoutInfo layout_info; @@ -2125,75 +2123,8 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die, clang::CXXRecordDecl *record_decl = m_ast.GetAsCXXRecordDecl(clang_type.GetOpaqueQualType()); - if (record_decl) { - if (log) { - ModuleSP module_sp = dwarf->GetObjectFile()->GetModule(); - - if (module_sp) { - module_sp->LogMessage( - log, - "ClangASTContext::CompleteTypeFromDWARF (clang_type = %p) " - "caching layout info for record_decl = %p, bit_size = %" PRIu64 - ", alignment = %" PRIu64 - ", field_offsets[%u], base_offsets[%u], vbase_offsets[%u])", - static_cast(clang_type.GetOpaqueQualType()), - static_cast(record_decl), layout_info.bit_size, - layout_info.alignment, - static_cast(layout_info.field_offsets.size()), - static_cast(layout_info.base_offsets.size()), - static_cast(layout_info.vbase_offsets.size())); - - uint32_t idx; - { - llvm::DenseMap::const_iterator - pos, - end = layout_info.field_offsets.end(); - for (idx = 0, pos = layout_info.field_offsets.begin(); pos != end; - ++pos, ++idx) { - module_sp->LogMessage( - log, - "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) field[%u] = { bit_offset=%u, name='%s' }", - static_cast(clang_type.GetOpaqueQualType()), idx, - static_cast(pos->second), - pos->first->getNameAsString().c_str()); - } - } - - { - llvm::DenseMap::const_iterator base_pos, - base_end = layout_info.base_offsets.end(); - for (idx = 0, base_pos = layout_info.base_offsets.begin(); - base_pos != base_end; ++base_pos, ++idx) { - module_sp->LogMessage( - log, - "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) base[%u] = { byte_offset=%u, name='%s' }", - clang_type.GetOpaqueQualType(), idx, - (uint32_t)base_pos->second.getQuantity(), - base_pos->first->getNameAsString().c_str()); - } - } - { - llvm::DenseMap::const_iterator vbase_pos, - vbase_end = layout_info.vbase_offsets.end(); - for (idx = 0, vbase_pos = layout_info.vbase_offsets.begin(); - vbase_pos != vbase_end; ++vbase_pos, ++idx) { - module_sp->LogMessage( - log, - "ClangASTContext::CompleteTypeFromDWARF (clang_type = " - "%p) vbase[%u] = { byte_offset=%u, name='%s' }", - static_cast(clang_type.GetOpaqueQualType()), idx, - static_cast(vbase_pos->second.getQuantity()), - vbase_pos->first->getNameAsString().c_str()); - } - } - } - } + if (record_decl) GetClangASTImporter().InsertRecordDecl(record_decl, layout_info); - } } return (bool)clang_type; From d752b75d7fce2a77bb7656d33d2aa062372dc014 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 29 Nov 2019 12:26:33 +0100 Subject: [PATCH 220/591] [lldb][NFC] Simplify regex_chars in CommandCompletions --- lldb/source/Commands/CommandCompletions.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp index d325b724a38fc..b382e26e2b704 100644 --- a/lldb/source/Commands/CommandCompletions.cpp +++ b/lldb/source/Commands/CommandCompletions.cpp @@ -413,10 +413,7 @@ void CommandCompletions::SourceFileCompleter::DoCompletion( // SymbolCompleter static bool regex_chars(const char comp) { - return (comp == '[' || comp == ']' || comp == '(' || comp == ')' || - comp == '{' || comp == '}' || comp == '+' || comp == '.' || - comp == '*' || comp == '|' || comp == '^' || comp == '$' || - comp == '\\' || comp == '?'); + return llvm::StringRef("[](){}+.*|^$\\?").contains(comp); } CommandCompletions::SymbolCompleter::SymbolCompleter( From 656a8123deed31d2d7aee313e87911dc153fa6d3 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 29 Nov 2019 12:48:25 +0100 Subject: [PATCH 221/591] [lldb] Fix windows build for 38870af --- lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp index 0470394d42555..e8a8690c1ff1f 100644 --- a/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp +++ b/lldb/unittests/SymbolFile/PDB/SymbolFilePDBTests.cpp @@ -109,7 +109,7 @@ class SymbolFilePDBTests : public testing::Test { const FileSpec &spec) const { for (size_t i = 0; i < sc_list.GetSize(); ++i) { const SymbolContext &sc = sc_list[i]; - if (FileSpecMatchesAsBaseOrFull(*sc.comp_unit, spec)) + if (FileSpecMatchesAsBaseOrFull(sc.comp_unit->GetPrimaryFile(), spec)) return true; } return false; From 76016f9b3a9acdba7728561a7ddfb48b1245dfa7 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 29 Nov 2019 12:40:19 +0100 Subject: [PATCH 222/591] [lldb][NFC] Early exit in ClangASTContext::CreateInstance --- lldb/source/Symbol/ClangASTContext.cpp | 82 +++++++++++++------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index e70b005550d10..adb8d57a74f65 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -562,47 +562,47 @@ uint32_t ClangASTContext::GetPluginVersion() { return 1; } lldb::TypeSystemSP ClangASTContext::CreateInstance(lldb::LanguageType language, lldb_private::Module *module, Target *target) { - if (ClangASTContextSupportsLanguage(language)) { - ArchSpec arch; - if (module) - arch = module->GetArchitecture(); - else if (target) - arch = target->GetArchitecture(); - - if (arch.IsValid()) { - ArchSpec fixed_arch = arch; - // LLVM wants this to be set to iOS or MacOSX; if we're working on - // a bare-boards type image, change the triple for llvm's benefit. - if (fixed_arch.GetTriple().getVendor() == llvm::Triple::Apple && - fixed_arch.GetTriple().getOS() == llvm::Triple::UnknownOS) { - if (fixed_arch.GetTriple().getArch() == llvm::Triple::arm || - fixed_arch.GetTriple().getArch() == llvm::Triple::aarch64 || - fixed_arch.GetTriple().getArch() == llvm::Triple::aarch64_32 || - fixed_arch.GetTriple().getArch() == llvm::Triple::thumb) { - fixed_arch.GetTriple().setOS(llvm::Triple::IOS); - } else { - fixed_arch.GetTriple().setOS(llvm::Triple::MacOSX); - } - } - - if (module) { - std::shared_ptr ast_sp( - new ClangASTContext(fixed_arch)); - return ast_sp; - } else if (target && target->IsValid()) { - std::shared_ptr ast_sp( - new ClangASTContextForExpressions(*target, fixed_arch)); - ast_sp->m_scratch_ast_source_up.reset( - new ClangASTSource(target->shared_from_this())); - lldbassert(ast_sp->getFileManager()); - ast_sp->m_scratch_ast_source_up->InstallASTContext( - *ast_sp->getASTContext(), *ast_sp->getFileManager(), true); - llvm::IntrusiveRefCntPtr proxy_ast_source( - ast_sp->m_scratch_ast_source_up->CreateProxy()); - ast_sp->SetExternalSource(proxy_ast_source); - return ast_sp; - } - } + if (!ClangASTContextSupportsLanguage(language)) + return lldb::TypeSystemSP(); + ArchSpec arch; + if (module) + arch = module->GetArchitecture(); + else if (target) + arch = target->GetArchitecture(); + + if (!arch.IsValid()) + return lldb::TypeSystemSP(); + + ArchSpec fixed_arch = arch; + // LLVM wants this to be set to iOS or MacOSX; if we're working on + // a bare-boards type image, change the triple for llvm's benefit. + if (fixed_arch.GetTriple().getVendor() == llvm::Triple::Apple && + fixed_arch.GetTriple().getOS() == llvm::Triple::UnknownOS) { + if (fixed_arch.GetTriple().getArch() == llvm::Triple::arm || + fixed_arch.GetTriple().getArch() == llvm::Triple::aarch64 || + fixed_arch.GetTriple().getArch() == llvm::Triple::aarch64_32 || + fixed_arch.GetTriple().getArch() == llvm::Triple::thumb) { + fixed_arch.GetTriple().setOS(llvm::Triple::IOS); + } else { + fixed_arch.GetTriple().setOS(llvm::Triple::MacOSX); + } + } + + if (module) { + std::shared_ptr ast_sp(new ClangASTContext(fixed_arch)); + return ast_sp; + } else if (target && target->IsValid()) { + std::shared_ptr ast_sp( + new ClangASTContextForExpressions(*target, fixed_arch)); + ast_sp->m_scratch_ast_source_up.reset( + new ClangASTSource(target->shared_from_this())); + lldbassert(ast_sp->getFileManager()); + ast_sp->m_scratch_ast_source_up->InstallASTContext( + *ast_sp->getASTContext(), *ast_sp->getFileManager(), true); + llvm::IntrusiveRefCntPtr proxy_ast_source( + ast_sp->m_scratch_ast_source_up->CreateProxy()); + ast_sp->SetExternalSource(proxy_ast_source); + return ast_sp; } return lldb::TypeSystemSP(); } From 363cbcc59040dd337f958fd0fd0584f7c6ebbd63 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Fri, 29 Nov 2019 13:24:13 +0100 Subject: [PATCH 223/591] [InstCombine] Run the cast.ll test a twice, now also testing little endian. NFC Some tests in test/Transforms/InstCombine/cast.ll depend on endianness. Added a second run line to run the tests with both big and little endian. In the past we only compiled for big endian, and then it was hard to see if any big endian bugfixes would impact the little endian result etc. --- llvm/test/Transforms/InstCombine/cast.ll | 993 ++++++++++++----------- 1 file changed, 501 insertions(+), 492 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index fd35bd92dd7dc..66eb3904ebb78 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Tests to make sure elimination of casts is working correctly -; RUN: opt < %s -instcombine -S | FileCheck %s -target datalayout = "E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" +; RUN: opt < %s -instcombine -S -data-layout="E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" | FileCheck %s --check-prefixes=ALL,BE +; RUN: opt < %s -instcombine -S -data-layout="e-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64" | FileCheck %s --check-prefixes=ALL,LE @inbuf = external global [32832 x i8] define i32 @test1(i32 %A) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: ret i32 [[A:%.*]] +; ALL-LABEL: @test1( +; ALL-NEXT: ret i32 [[A:%.*]] ; %c1 = bitcast i32 %A to i32 %c2 = bitcast i32 %c1 to i32 @@ -15,9 +15,9 @@ define i32 @test1(i32 %A) { } define i64 @test2(i8 %A) { -; CHECK-LABEL: @test2( -; CHECK-NEXT: [[RET:%.*]] = zext i8 [[A:%.*]] to i64 -; CHECK-NEXT: ret i64 [[RET]] +; ALL-LABEL: @test2( +; ALL-NEXT: [[RET:%.*]] = zext i8 [[A:%.*]] to i64 +; ALL-NEXT: ret i64 [[RET]] ; %c1 = zext i8 %A to i16 %c2 = zext i16 %c1 to i32 @@ -26,9 +26,9 @@ define i64 @test2(i8 %A) { } define i64 @test3(i64 %A) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: [[C2:%.*]] = and i64 [[A:%.*]], 255 -; CHECK-NEXT: ret i64 [[C2]] +; ALL-LABEL: @test3( +; ALL-NEXT: [[C2:%.*]] = and i64 [[A:%.*]], 255 +; ALL-NEXT: ret i64 [[C2]] ; %c1 = trunc i64 %A to i8 %c2 = zext i8 %c1 to i64 @@ -36,10 +36,10 @@ define i64 @test3(i64 %A) { } define i32 @test4(i32 %A, i32 %B) { -; CHECK-LABEL: @test4( -; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[RESULT:%.*]] = zext i1 [[COND]] to i32 -; CHECK-NEXT: ret i32 [[RESULT]] +; ALL-LABEL: @test4( +; ALL-NEXT: [[COND:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; ALL-NEXT: [[RESULT:%.*]] = zext i1 [[COND]] to i32 +; ALL-NEXT: ret i32 [[RESULT]] ; %COND = icmp slt i32 %A, %B %c = zext i1 %COND to i8 @@ -48,9 +48,9 @@ define i32 @test4(i32 %A, i32 %B) { } define i32 @test5(i1 %B) { -; CHECK-LABEL: @test5( -; CHECK-NEXT: [[RESULT:%.*]] = zext i1 [[B:%.*]] to i32 -; CHECK-NEXT: ret i32 [[RESULT]] +; ALL-LABEL: @test5( +; ALL-NEXT: [[RESULT:%.*]] = zext i1 [[B:%.*]] to i32 +; ALL-NEXT: ret i32 [[RESULT]] ; %c = zext i1 %B to i8 %result = zext i8 %c to i32 @@ -58,9 +58,9 @@ define i32 @test5(i1 %B) { } define i32 @test6(i64 %A) { -; CHECK-LABEL: @test6( -; CHECK-NEXT: [[C1:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: ret i32 [[C1]] +; ALL-LABEL: @test6( +; ALL-NEXT: [[C1:%.*]] = trunc i64 [[A:%.*]] to i32 +; ALL-NEXT: ret i32 [[C1]] ; %c1 = trunc i64 %A to i32 %res = bitcast i32 %c1 to i32 @@ -68,9 +68,9 @@ define i32 @test6(i64 %A) { } define i64 @test7(i1 %A) { -; CHECK-LABEL: @test7( -; CHECK-NEXT: [[RES:%.*]] = zext i1 [[A:%.*]] to i64 -; CHECK-NEXT: ret i64 [[RES]] +; ALL-LABEL: @test7( +; ALL-NEXT: [[RES:%.*]] = zext i1 [[A:%.*]] to i64 +; ALL-NEXT: ret i64 [[RES]] ; %c1 = zext i1 %A to i32 %res = sext i32 %c1 to i64 @@ -78,9 +78,9 @@ define i64 @test7(i1 %A) { } define i64 @test8(i8 %A) { -; CHECK-LABEL: @test8( -; CHECK-NEXT: [[C1:%.*]] = sext i8 [[A:%.*]] to i64 -; CHECK-NEXT: ret i64 [[C1]] +; ALL-LABEL: @test8( +; ALL-NEXT: [[C1:%.*]] = sext i8 [[A:%.*]] to i64 +; ALL-NEXT: ret i64 [[C1]] ; %c1 = sext i8 %A to i64 %res = bitcast i64 %c1 to i64 @@ -88,8 +88,8 @@ define i64 @test8(i8 %A) { } define i16 @test9(i16 %A) { -; CHECK-LABEL: @test9( -; CHECK-NEXT: ret i16 [[A:%.*]] +; ALL-LABEL: @test9( +; ALL-NEXT: ret i16 [[A:%.*]] ; %c1 = sext i16 %A to i32 %c2 = trunc i32 %c1 to i16 @@ -97,8 +97,8 @@ define i16 @test9(i16 %A) { } define i16 @test10(i16 %A) { -; CHECK-LABEL: @test10( -; CHECK-NEXT: ret i16 [[A:%.*]] +; ALL-LABEL: @test10( +; ALL-NEXT: ret i16 [[A:%.*]] ; %c1 = sext i16 %A to i32 %c2 = trunc i32 %c1 to i16 @@ -108,9 +108,9 @@ define i16 @test10(i16 %A) { declare void @varargs(i32, ...) define void @test11(i32* %P) { -; CHECK-LABEL: @test11( -; CHECK-NEXT: call void (i32, ...) @varargs(i32 5, i32* [[P:%.*]]) -; CHECK-NEXT: ret void +; ALL-LABEL: @test11( +; ALL-NEXT: call void (i32, ...) @varargs(i32 5, i32* [[P:%.*]]) +; ALL-NEXT: ret void ; %c = bitcast i32* %P to i16* call void (i32, ...) @varargs( i32 5, i16* %c ) @@ -119,16 +119,16 @@ define void @test11(i32* %P) { declare i32 @__gxx_personality_v0(...) define void @test_invoke_vararg_cast(i32* %a, i32* %b) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @test_invoke_vararg_cast( -; CHECK-NEXT: entry: -; CHECK-NEXT: invoke void (i32, ...) @varargs(i32 1, i32* [[B:%.*]], i32* [[A:%.*]]) -; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] -; CHECK: invoke.cont: -; CHECK-NEXT: ret void -; CHECK: lpad: -; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: cleanup -; CHECK-NEXT: ret void +; ALL-LABEL: @test_invoke_vararg_cast( +; ALL-NEXT: entry: +; ALL-NEXT: invoke void (i32, ...) @varargs(i32 1, i32* [[B:%.*]], i32* [[A:%.*]]) +; ALL-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] +; ALL: invoke.cont: +; ALL-NEXT: ret void +; ALL: lpad: +; ALL-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; ALL-NEXT: cleanup +; ALL-NEXT: ret void ; entry: %0 = bitcast i32* %b to i8* @@ -146,18 +146,18 @@ lpad: } define i8* @test13(i64 %A) { -; CHECK-LABEL: @test13( -; CHECK-NEXT: [[C:%.*]] = getelementptr [32832 x i8], [32832 x i8]* @inbuf, i64 0, i64 [[A:%.*]] -; CHECK-NEXT: ret i8* [[C]] +; ALL-LABEL: @test13( +; ALL-NEXT: [[C:%.*]] = getelementptr [32832 x i8], [32832 x i8]* @inbuf, i64 0, i64 [[A:%.*]] +; ALL-NEXT: ret i8* [[C]] ; %c = getelementptr [0 x i8], [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A ret i8* %c } define i1 @test14(i8 %A) { -; CHECK-LABEL: @test14( -; CHECK-NEXT: [[X:%.*]] = icmp sgt i8 [[A:%.*]], -1 -; CHECK-NEXT: ret i1 [[X]] +; ALL-LABEL: @test14( +; ALL-NEXT: [[X:%.*]] = icmp sgt i8 [[A:%.*]], -1 +; ALL-NEXT: ret i1 [[X]] ; %c = bitcast i8 %A to i8 %X = icmp ult i8 %c, -128 @@ -173,18 +173,18 @@ define i1 @test14(i8 %A) { ;} define i1 @test16(i32* %P) { -; CHECK-LABEL: @test16( -; CHECK-NEXT: [[C:%.*]] = icmp ne i32* [[P:%.*]], null -; CHECK-NEXT: ret i1 [[C]] +; ALL-LABEL: @test16( +; ALL-NEXT: [[C:%.*]] = icmp ne i32* [[P:%.*]], null +; ALL-NEXT: ret i1 [[C]] ; %c = icmp ne i32* %P, null ret i1 %c } define i16 @test17(i1 %x) { -; CHECK-LABEL: @test17( -; CHECK-NEXT: [[T86:%.*]] = zext i1 [[X:%.*]] to i16 -; CHECK-NEXT: ret i16 [[T86]] +; ALL-LABEL: @test17( +; ALL-NEXT: [[T86:%.*]] = zext i1 [[X:%.*]] to i16 +; ALL-NEXT: ret i16 [[T86]] ; %c = zext i1 %x to i32 %t86 = trunc i32 %c to i16 @@ -192,9 +192,9 @@ define i16 @test17(i1 %x) { } define i16 @test18(i8 %x) { -; CHECK-LABEL: @test18( -; CHECK-NEXT: [[T86:%.*]] = sext i8 [[X:%.*]] to i16 -; CHECK-NEXT: ret i16 [[T86]] +; ALL-LABEL: @test18( +; ALL-NEXT: [[T86:%.*]] = sext i8 [[X:%.*]] to i16 +; ALL-NEXT: ret i16 [[T86]] ; %c = sext i8 %x to i32 %t86 = trunc i32 %c to i16 @@ -202,9 +202,9 @@ define i16 @test18(i8 %x) { } define i1 @test19(i32 %X) { -; CHECK-LABEL: @test19( -; CHECK-NEXT: [[Z:%.*]] = icmp slt i32 [[X:%.*]], 12345 -; CHECK-NEXT: ret i1 [[Z]] +; ALL-LABEL: @test19( +; ALL-NEXT: [[Z:%.*]] = icmp slt i32 [[X:%.*]], 12345 +; ALL-NEXT: ret i1 [[Z]] ; %c = sext i32 %X to i64 %Z = icmp slt i64 %c, 12345 @@ -212,9 +212,9 @@ define i1 @test19(i32 %X) { } define <2 x i1> @test19vec(<2 x i32> %X) { -; CHECK-LABEL: @test19vec( -; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i32> [[X:%.*]], -; CHECK-NEXT: ret <2 x i1> [[Z]] +; ALL-LABEL: @test19vec( +; ALL-NEXT: [[Z:%.*]] = icmp slt <2 x i32> [[X:%.*]], +; ALL-NEXT: ret <2 x i1> [[Z]] ; %c = sext <2 x i32> %X to <2 x i64> %Z = icmp slt <2 x i64> %c, @@ -222,9 +222,9 @@ define <2 x i1> @test19vec(<2 x i32> %X) { } define <3 x i1> @test19vec2(<3 x i1> %X) { -; CHECK-LABEL: @test19vec2( -; CHECK-NEXT: [[CMPEQ:%.*]] = xor <3 x i1> [[X:%.*]], -; CHECK-NEXT: ret <3 x i1> [[CMPEQ]] +; ALL-LABEL: @test19vec2( +; ALL-NEXT: [[CMPEQ:%.*]] = xor <3 x i1> [[X:%.*]], +; ALL-NEXT: ret <3 x i1> [[CMPEQ]] ; %sext = sext <3 x i1> %X to <3 x i32> %cmpeq = icmp eq <3 x i32> %sext, zeroinitializer @@ -232,8 +232,8 @@ define <3 x i1> @test19vec2(<3 x i1> %X) { } define i1 @test20(i1 %B) { -; CHECK-LABEL: @test20( -; CHECK-NEXT: ret i1 false +; ALL-LABEL: @test20( +; ALL-NEXT: ret i1 false ; %c = zext i1 %B to i32 %D = icmp slt i32 %c, -1 @@ -241,9 +241,9 @@ define i1 @test20(i1 %B) { } define i32 @test21(i32 %X) { -; CHECK-LABEL: @test21( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 255 -; CHECK-NEXT: ret i32 [[TMP1]] +; ALL-LABEL: @test21( +; ALL-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 255 +; ALL-NEXT: ret i32 [[TMP1]] ; %c1 = trunc i32 %X to i8 %c2 = sext i8 %c1 to i32 @@ -252,9 +252,9 @@ define i32 @test21(i32 %X) { } define i32 @test22(i32 %X) { -; CHECK-LABEL: @test22( -; CHECK-NEXT: [[SEXT:%.*]] = shl i32 [[X:%.*]], 24 -; CHECK-NEXT: ret i32 [[SEXT]] +; ALL-LABEL: @test22( +; ALL-NEXT: [[SEXT:%.*]] = shl i32 [[X:%.*]], 24 +; ALL-NEXT: ret i32 [[SEXT]] ; %c1 = trunc i32 %X to i8 %c2 = sext i8 %c1 to i32 @@ -263,9 +263,9 @@ define i32 @test22(i32 %X) { } define i32 @test23(i32 %X) { -; CHECK-LABEL: @test23( -; CHECK-NEXT: [[C2:%.*]] = and i32 [[X:%.*]], 65535 -; CHECK-NEXT: ret i32 [[C2]] +; ALL-LABEL: @test23( +; ALL-NEXT: [[C2:%.*]] = and i32 [[X:%.*]], 65535 +; ALL-NEXT: ret i32 [[C2]] ; %c1 = trunc i32 %X to i16 %c2 = zext i16 %c1 to i32 @@ -273,8 +273,8 @@ define i32 @test23(i32 %X) { } define i1 @test24(i1 %C) { -; CHECK-LABEL: @test24( -; CHECK-NEXT: ret i1 true +; ALL-LABEL: @test24( +; ALL-NEXT: ret i1 true ; %X = select i1 %C, i32 14, i32 1234 %c = icmp ne i32 %X, 0 @@ -282,9 +282,9 @@ define i1 @test24(i1 %C) { } define i32 @test26(float %F) { -; CHECK-LABEL: @test26( -; CHECK-NEXT: [[D:%.*]] = fptosi float [[F:%.*]] to i32 -; CHECK-NEXT: ret i32 [[D]] +; ALL-LABEL: @test26( +; ALL-NEXT: [[D:%.*]] = fptosi float [[F:%.*]] to i32 +; ALL-NEXT: ret i32 [[D]] ; %c = fpext float %F to double %D = fptosi double %c to i32 @@ -292,28 +292,28 @@ define i32 @test26(float %F) { } define [4 x float]* @test27([9 x [4 x float]]* %A) { -; CHECK-LABEL: @test27( -; CHECK-NEXT: [[C:%.*]] = getelementptr [9 x [4 x float]], [9 x [4 x float]]* [[A:%.*]], i64 0, i64 0 -; CHECK-NEXT: ret [4 x float]* [[C]] +; ALL-LABEL: @test27( +; ALL-NEXT: [[C:%.*]] = getelementptr [9 x [4 x float]], [9 x [4 x float]]* [[A:%.*]], i64 0, i64 0 +; ALL-NEXT: ret [4 x float]* [[C]] ; %c = bitcast [9 x [4 x float]]* %A to [4 x float]* ret [4 x float]* %c } define float* @test28([4 x float]* %A) { -; CHECK-LABEL: @test28( -; CHECK-NEXT: [[C:%.*]] = getelementptr [4 x float], [4 x float]* [[A:%.*]], i64 0, i64 0 -; CHECK-NEXT: ret float* [[C]] +; ALL-LABEL: @test28( +; ALL-NEXT: [[C:%.*]] = getelementptr [4 x float], [4 x float]* [[A:%.*]], i64 0, i64 0 +; ALL-NEXT: ret float* [[C]] ; %c = bitcast [4 x float]* %A to float* ret float* %c } define i32 @test29(i32 %c1, i32 %c2) { -; CHECK-LABEL: @test29( -; CHECK-NEXT: [[T21:%.*]] = or i32 [[C2:%.*]], [[C1:%.*]] -; CHECK-NEXT: [[T10:%.*]] = and i32 [[T21]], 255 -; CHECK-NEXT: ret i32 [[T10]] +; ALL-LABEL: @test29( +; ALL-NEXT: [[T21:%.*]] = or i32 [[C2:%.*]], [[C1:%.*]] +; ALL-NEXT: [[T10:%.*]] = and i32 [[T21]], 255 +; ALL-NEXT: ret i32 [[T10]] ; %t1 = trunc i32 %c1 to i8 %tmask = trunc i32 %c2 to i8 @@ -323,10 +323,10 @@ define i32 @test29(i32 %c1, i32 %c2) { } define i32 @test30(i32 %c1) { -; CHECK-LABEL: @test30( -; CHECK-NEXT: [[C3:%.*]] = and i32 [[C1:%.*]], 255 -; CHECK-NEXT: [[C4:%.*]] = xor i32 [[C3]], 1 -; CHECK-NEXT: ret i32 [[C4]] +; ALL-LABEL: @test30( +; ALL-NEXT: [[C3:%.*]] = and i32 [[C1:%.*]], 255 +; ALL-NEXT: [[C4:%.*]] = xor i32 [[C3]], 1 +; ALL-NEXT: ret i32 [[C4]] ; %c2 = trunc i32 %c1 to i8 %c3 = xor i8 %c2, 1 @@ -335,10 +335,10 @@ define i32 @test30(i32 %c1) { } define i1 @test31(i64 %A) { -; CHECK-LABEL: @test31( -; CHECK-NEXT: [[C1:%.*]] = and i64 [[A:%.*]], 42 -; CHECK-NEXT: [[D:%.*]] = icmp eq i64 [[C1]], 10 -; CHECK-NEXT: ret i1 [[D]] +; ALL-LABEL: @test31( +; ALL-NEXT: [[C1:%.*]] = and i64 [[A:%.*]], 42 +; ALL-NEXT: [[D:%.*]] = icmp eq i64 [[C1]], 10 +; ALL-NEXT: ret i1 [[D]] ; %B = trunc i64 %A to i32 %C = and i32 %B, 42 @@ -349,11 +349,11 @@ define i1 @test31(i64 %A) { ; FIXME: Vectors should fold too...or not? ; Does this depend on the whether the source/dest types of the trunc are legal in the data layout? define <2 x i1> @test31vec(<2 x i64> %A) { -; CHECK-LABEL: @test31vec( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[C]], -; CHECK-NEXT: ret <2 x i1> [[D]] +; ALL-LABEL: @test31vec( +; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; ALL-NEXT: [[C:%.*]] = and <2 x i32> [[B]], +; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[C]], +; ALL-NEXT: ret <2 x i1> [[D]] ; %B = trunc <2 x i64> %A to <2 x i32> %C = and <2 x i32> %B, @@ -365,10 +365,10 @@ define <2 x i1> @test31vec(<2 x i64> %A) { ; even for vectors. Earlier folds should ensure that the icmp(and(zext)) pattern never occurs. define <2 x i1> @test32vec(<2 x i8> %A) { -; CHECK-LABEL: @test32vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], -; CHECK-NEXT: ret <2 x i1> [[D]] +; ALL-LABEL: @test32vec( +; ALL-NEXT: [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], +; ALL-NEXT: [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], +; ALL-NEXT: ret <2 x i1> [[D]] ; %B = zext <2 x i8> %A to <2 x i16> %C = and <2 x i16> %B, @@ -377,8 +377,8 @@ define <2 x i1> @test32vec(<2 x i8> %A) { } define i32 @test33(i32 %c1) { -; CHECK-LABEL: @test33( -; CHECK-NEXT: ret i32 [[C1:%.*]] +; ALL-LABEL: @test33( +; ALL-NEXT: ret i32 [[C1:%.*]] ; %x = bitcast i32 %c1 to float %y = bitcast float %x to i32 @@ -386,9 +386,9 @@ define i32 @test33(i32 %c1) { } define i16 @test34(i16 %a) { -; CHECK-LABEL: @test34( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 [[A:%.*]], 8 -; CHECK-NEXT: ret i16 [[TMP1]] +; ALL-LABEL: @test34( +; ALL-NEXT: [[TMP1:%.*]] = lshr i16 [[A:%.*]], 8 +; ALL-NEXT: ret i16 [[TMP1]] ; %c1 = zext i16 %a to i32 %t21 = lshr i32 %c1, 8 @@ -397,9 +397,9 @@ define i16 @test34(i16 %a) { } define i16 @test35(i16 %a) { -; CHECK-LABEL: @test35( -; CHECK-NEXT: [[T2:%.*]] = lshr i16 [[A:%.*]], 8 -; CHECK-NEXT: ret i16 [[T2]] +; ALL-LABEL: @test35( +; ALL-NEXT: [[T2:%.*]] = lshr i16 [[A:%.*]], 8 +; ALL-NEXT: ret i16 [[T2]] ; %c1 = bitcast i16 %a to i16 %t2 = lshr i16 %c1, 8 @@ -409,9 +409,9 @@ define i16 @test35(i16 %a) { ; rdar://6480391 define i1 @test36(i32 %a) { -; CHECK-LABEL: @test36( -; CHECK-NEXT: [[D:%.*]] = icmp sgt i32 [[A:%.*]], -1 -; CHECK-NEXT: ret i1 [[D]] +; ALL-LABEL: @test36( +; ALL-NEXT: [[D:%.*]] = icmp sgt i32 [[A:%.*]], -1 +; ALL-NEXT: ret i1 [[D]] ; %b = lshr i32 %a, 31 %c = trunc i32 %b to i8 @@ -420,9 +420,9 @@ define i1 @test36(i32 %a) { } define <2 x i1> @test36vec(<2 x i32> %a) { -; CHECK-LABEL: @test36vec( -; CHECK-NEXT: [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], -; CHECK-NEXT: ret <2 x i1> [[D]] +; ALL-LABEL: @test36vec( +; ALL-NEXT: [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], +; ALL-NEXT: ret <2 x i1> [[D]] ; %b = lshr <2 x i32> %a, %c = trunc <2 x i32> %b to <2 x i8> @@ -431,8 +431,8 @@ define <2 x i1> @test36vec(<2 x i32> %a) { } define i1 @test37(i32 %a) { -; CHECK-LABEL: @test37( -; CHECK-NEXT: ret i1 false +; ALL-LABEL: @test37( +; ALL-NEXT: ret i1 false ; %b = lshr i32 %a, 31 %c = or i32 %b, 512 @@ -442,10 +442,10 @@ define i1 @test37(i32 %a) { } define i64 @test38(i32 %a) { -; CHECK-LABEL: @test38( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[A:%.*]], -2 -; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i64 -; CHECK-NEXT: ret i64 [[TMP2]] +; ALL-LABEL: @test38( +; ALL-NEXT: [[TMP1:%.*]] = icmp ne i32 [[A:%.*]], -2 +; ALL-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i64 +; ALL-NEXT: ret i64 [[TMP2]] ; %1 = icmp eq i32 %a, -2 %2 = zext i1 %1 to i8 @@ -455,9 +455,9 @@ define i64 @test38(i32 %a) { } define i16 @test39(i16 %a) { -; CHECK-LABEL: @test39( -; CHECK-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) -; CHECK-NEXT: ret i16 [[REV]] +; ALL-LABEL: @test39( +; ALL-NEXT: [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]]) +; ALL-NEXT: ret i16 [[REV]] ; %t = zext i16 %a to i32 %t21 = lshr i32 %t, 8 @@ -468,11 +468,11 @@ define i16 @test39(i16 %a) { } define i16 @test40(i16 %a) { -; CHECK-LABEL: @test40( -; CHECK-NEXT: [[T21:%.*]] = lshr i16 [[A:%.*]], 9 -; CHECK-NEXT: [[T5:%.*]] = shl i16 [[A]], 8 -; CHECK-NEXT: [[T32:%.*]] = or i16 [[T21]], [[T5]] -; CHECK-NEXT: ret i16 [[T32]] +; ALL-LABEL: @test40( +; ALL-NEXT: [[T21:%.*]] = lshr i16 [[A:%.*]], 9 +; ALL-NEXT: [[T5:%.*]] = shl i16 [[A]], 8 +; ALL-NEXT: [[T32:%.*]] = or i16 [[T21]], [[T5]] +; ALL-NEXT: ret i16 [[T32]] ; %t = zext i16 %a to i32 %t21 = lshr i32 %t, 9 @@ -483,11 +483,11 @@ define i16 @test40(i16 %a) { } define <2 x i16> @test40vec(<2 x i16> %a) { -; CHECK-LABEL: @test40vec( -; CHECK-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], -; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], -; CHECK-NEXT: [[T32:%.*]] = or <2 x i16> [[T21]], [[T5]] -; CHECK-NEXT: ret <2 x i16> [[T32]] +; ALL-LABEL: @test40vec( +; ALL-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], +; ALL-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], +; ALL-NEXT: [[T32:%.*]] = or <2 x i16> [[T21]], [[T5]] +; ALL-NEXT: ret <2 x i16> [[T32]] ; %t = zext <2 x i16> %a to <2 x i32> %t21 = lshr <2 x i32> %t, @@ -499,8 +499,8 @@ define <2 x i16> @test40vec(<2 x i16> %a) { ; PR1263 define i32* @test41(i32* %t1) { -; CHECK-LABEL: @test41( -; CHECK-NEXT: ret i32* [[T1:%.*]] +; ALL-LABEL: @test41( +; ALL-NEXT: ret i32* [[T1:%.*]] ; %t64 = bitcast i32* %t1 to { i32 }* %t65 = getelementptr { i32 }, { i32 }* %t64, i32 0, i32 0 @@ -508,9 +508,9 @@ define i32* @test41(i32* %t1) { } define i32 addrspace(1)* @test41_addrspacecast_smaller(i32* %t1) { -; CHECK-LABEL: @test41_addrspacecast_smaller( -; CHECK-NEXT: [[T65:%.*]] = addrspacecast i32* [[T1:%.*]] to i32 addrspace(1)* -; CHECK-NEXT: ret i32 addrspace(1)* [[T65]] +; ALL-LABEL: @test41_addrspacecast_smaller( +; ALL-NEXT: [[T65:%.*]] = addrspacecast i32* [[T1:%.*]] to i32 addrspace(1)* +; ALL-NEXT: ret i32 addrspace(1)* [[T65]] ; %t64 = addrspacecast i32* %t1 to { i32 } addrspace(1)* %t65 = getelementptr { i32 }, { i32 } addrspace(1)* %t64, i32 0, i32 0 @@ -518,9 +518,9 @@ define i32 addrspace(1)* @test41_addrspacecast_smaller(i32* %t1) { } define i32* @test41_addrspacecast_larger(i32 addrspace(1)* %t1) { -; CHECK-LABEL: @test41_addrspacecast_larger( -; CHECK-NEXT: [[T65:%.*]] = addrspacecast i32 addrspace(1)* [[T1:%.*]] to i32* -; CHECK-NEXT: ret i32* [[T65]] +; ALL-LABEL: @test41_addrspacecast_larger( +; ALL-NEXT: [[T65:%.*]] = addrspacecast i32 addrspace(1)* [[T1:%.*]] to i32* +; ALL-NEXT: ret i32* [[T65]] ; %t64 = addrspacecast i32 addrspace(1)* %t1 to { i32 }* %t65 = getelementptr { i32 }, { i32 }* %t64, i32 0, i32 0 @@ -528,9 +528,9 @@ define i32* @test41_addrspacecast_larger(i32 addrspace(1)* %t1) { } define i32 @test42(i32 %X) { -; CHECK-LABEL: @test42( -; CHECK-NEXT: [[Z:%.*]] = and i32 [[X:%.*]], 255 -; CHECK-NEXT: ret i32 [[Z]] +; ALL-LABEL: @test42( +; ALL-NEXT: [[Z:%.*]] = and i32 [[X:%.*]], 255 +; ALL-NEXT: ret i32 [[Z]] ; %Y = trunc i32 %X to i8 %Z = zext i8 %Y to i32 @@ -539,10 +539,10 @@ define i32 @test42(i32 %X) { ; rdar://6598839 define zeroext i64 @test43(i8 zeroext %on_off) { -; CHECK-LABEL: @test43( -; CHECK-NEXT: [[A:%.*]] = zext i8 [[ON_OFF:%.*]] to i64 -; CHECK-NEXT: [[B:%.*]] = add nsw i64 [[A]], -1 -; CHECK-NEXT: ret i64 [[B]] +; ALL-LABEL: @test43( +; ALL-NEXT: [[A:%.*]] = zext i8 [[ON_OFF:%.*]] to i64 +; ALL-NEXT: [[B:%.*]] = add nsw i64 [[A]], -1 +; ALL-NEXT: ret i64 [[B]] ; %A = zext i8 %on_off to i32 %B = add i32 %A, -1 @@ -551,10 +551,10 @@ define zeroext i64 @test43(i8 zeroext %on_off) { } define i64 @test44(i8 %T) { -; CHECK-LABEL: @test44( -; CHECK-NEXT: [[A:%.*]] = zext i8 [[T:%.*]] to i64 -; CHECK-NEXT: [[B:%.*]] = or i64 [[A]], 1234 -; CHECK-NEXT: ret i64 [[B]] +; ALL-LABEL: @test44( +; ALL-NEXT: [[A:%.*]] = zext i8 [[T:%.*]] to i64 +; ALL-NEXT: [[B:%.*]] = or i64 [[A]], 1234 +; ALL-NEXT: ret i64 [[B]] ; %A = zext i8 %T to i16 %B = or i16 %A, 1234 @@ -563,11 +563,11 @@ define i64 @test44(i8 %T) { } define i64 @test45(i8 %A, i64 %Q) { -; CHECK-LABEL: @test45( -; CHECK-NEXT: [[B:%.*]] = sext i8 [[A:%.*]] to i64 -; CHECK-NEXT: [[C:%.*]] = or i64 [[B]], [[Q:%.*]] -; CHECK-NEXT: [[E:%.*]] = and i64 [[C]], 4294967295 -; CHECK-NEXT: ret i64 [[E]] +; ALL-LABEL: @test45( +; ALL-NEXT: [[B:%.*]] = sext i8 [[A:%.*]] to i64 +; ALL-NEXT: [[C:%.*]] = or i64 [[B]], [[Q:%.*]] +; ALL-NEXT: [[E:%.*]] = and i64 [[C]], 4294967295 +; ALL-NEXT: ret i64 [[E]] ; %D = trunc i64 %Q to i32 ;; should be removed %B = sext i8 %A to i32 @@ -578,10 +578,10 @@ define i64 @test45(i8 %A, i64 %Q) { define i64 @test46(i64 %A) { -; CHECK-LABEL: @test46( -; CHECK-NEXT: [[C:%.*]] = shl i64 [[A:%.*]], 8 -; CHECK-NEXT: [[D:%.*]] = and i64 [[C]], 10752 -; CHECK-NEXT: ret i64 [[D]] +; ALL-LABEL: @test46( +; ALL-NEXT: [[C:%.*]] = shl i64 [[A:%.*]], 8 +; ALL-NEXT: [[D:%.*]] = and i64 [[C]], 10752 +; ALL-NEXT: ret i64 [[D]] ; %B = trunc i64 %A to i32 %C = and i32 %B, 42 @@ -591,12 +591,12 @@ define i64 @test46(i64 %A) { } define <2 x i64> @test46vec(<2 x i64> %A) { -; CHECK-LABEL: @test46vec( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], -; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[C]], -; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[D]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[E]] +; ALL-LABEL: @test46vec( +; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; ALL-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], +; ALL-NEXT: [[D:%.*]] = and <2 x i32> [[C]], +; ALL-NEXT: [[E:%.*]] = zext <2 x i32> [[D]] to <2 x i64> +; ALL-NEXT: ret <2 x i64> [[E]] ; %B = trunc <2 x i64> %A to <2 x i32> %C = and <2 x i32> %B, @@ -606,11 +606,11 @@ define <2 x i64> @test46vec(<2 x i64> %A) { } define i64 @test47(i8 %A) { -; CHECK-LABEL: @test47( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[A:%.*]], 42 -; CHECK-NEXT: [[C:%.*]] = sext i8 [[TMP1]] to i64 -; CHECK-NEXT: [[E:%.*]] = and i64 [[C]], 4294967295 -; CHECK-NEXT: ret i64 [[E]] +; ALL-LABEL: @test47( +; ALL-NEXT: [[TMP1:%.*]] = or i8 [[A:%.*]], 42 +; ALL-NEXT: [[C:%.*]] = sext i8 [[TMP1]] to i64 +; ALL-NEXT: [[E:%.*]] = and i64 [[C]], 4294967295 +; ALL-NEXT: ret i64 [[E]] ; %B = sext i8 %A to i32 %C = or i32 %B, 42 @@ -619,12 +619,12 @@ define i64 @test47(i8 %A) { } define i64 @test48(i8 %A1, i8 %a2) { -; CHECK-LABEL: @test48( -; CHECK-NEXT: [[Z2:%.*]] = zext i8 [[A1:%.*]] to i32 -; CHECK-NEXT: [[C:%.*]] = shl nuw nsw i32 [[Z2]], 8 -; CHECK-NEXT: [[D:%.*]] = or i32 [[C]], [[Z2]] -; CHECK-NEXT: [[E:%.*]] = zext i32 [[D]] to i64 -; CHECK-NEXT: ret i64 [[E]] +; ALL-LABEL: @test48( +; ALL-NEXT: [[Z2:%.*]] = zext i8 [[A1:%.*]] to i32 +; ALL-NEXT: [[C:%.*]] = shl nuw nsw i32 [[Z2]], 8 +; ALL-NEXT: [[D:%.*]] = or i32 [[C]], [[Z2]] +; ALL-NEXT: [[E:%.*]] = zext i32 [[D]] to i64 +; ALL-NEXT: ret i64 [[E]] ; %Z1 = zext i8 %a2 to i32 %Z2 = zext i8 %A1 to i32 @@ -635,11 +635,11 @@ define i64 @test48(i8 %A1, i8 %a2) { } define i64 @test49(i64 %A) { -; CHECK-LABEL: @test49( -; CHECK-NEXT: [[C:%.*]] = shl i64 [[A:%.*]], 32 -; CHECK-NEXT: [[SEXT:%.*]] = ashr exact i64 [[C]], 32 -; CHECK-NEXT: [[D:%.*]] = or i64 [[SEXT]], 1 -; CHECK-NEXT: ret i64 [[D]] +; ALL-LABEL: @test49( +; ALL-NEXT: [[C:%.*]] = shl i64 [[A:%.*]], 32 +; ALL-NEXT: [[SEXT:%.*]] = ashr exact i64 [[C]], 32 +; ALL-NEXT: [[D:%.*]] = or i64 [[SEXT]], 1 +; ALL-NEXT: ret i64 [[D]] ; %B = trunc i64 %A to i32 %C = or i32 %B, 1 @@ -648,12 +648,12 @@ define i64 @test49(i64 %A) { } define i64 @test50(i64 %x) { -; CHECK-LABEL: @test50( -; CHECK-NEXT: [[A:%.*]] = lshr i64 [[X:%.*]], 2 -; CHECK-NEXT: [[D:%.*]] = shl i64 [[A]], 32 -; CHECK-NEXT: [[SEXT:%.*]] = add i64 [[D]], -4294967296 -; CHECK-NEXT: [[E:%.*]] = ashr exact i64 [[SEXT]], 32 -; CHECK-NEXT: ret i64 [[E]] +; ALL-LABEL: @test50( +; ALL-NEXT: [[A:%.*]] = lshr i64 [[X:%.*]], 2 +; ALL-NEXT: [[D:%.*]] = shl i64 [[A]], 32 +; ALL-NEXT: [[SEXT:%.*]] = add i64 [[D]], -4294967296 +; ALL-NEXT: [[E:%.*]] = ashr exact i64 [[SEXT]], 32 +; ALL-NEXT: ret i64 [[E]] ; %a = lshr i64 %x, 2 %B = trunc i64 %a to i32 @@ -664,13 +664,13 @@ define i64 @test50(i64 %x) { } define i64 @test51(i64 %A, i1 %cond) { -; CHECK-LABEL: @test51( -; CHECK-NEXT: [[C:%.*]] = and i64 [[A:%.*]], 4294967294 -; CHECK-NEXT: [[D:%.*]] = or i64 [[A]], 1 -; CHECK-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i64 [[C]], i64 [[D]] -; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[E]], 32 -; CHECK-NEXT: [[F:%.*]] = ashr exact i64 [[SEXT]], 32 -; CHECK-NEXT: ret i64 [[F]] +; ALL-LABEL: @test51( +; ALL-NEXT: [[C:%.*]] = and i64 [[A:%.*]], 4294967294 +; ALL-NEXT: [[D:%.*]] = or i64 [[A]], 1 +; ALL-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i64 [[C]], i64 [[D]] +; ALL-NEXT: [[SEXT:%.*]] = shl i64 [[E]], 32 +; ALL-NEXT: [[F:%.*]] = ashr exact i64 [[SEXT]], 32 +; ALL-NEXT: ret i64 [[F]] ; %B = trunc i64 %A to i32 %C = and i32 %B, -2 @@ -681,11 +681,11 @@ define i64 @test51(i64 %A, i1 %cond) { } define i32 @test52(i64 %A) { -; CHECK-LABEL: @test52( -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: [[C:%.*]] = and i32 [[B]], 7224 -; CHECK-NEXT: [[D:%.*]] = or i32 [[C]], 32962 -; CHECK-NEXT: ret i32 [[D]] +; ALL-LABEL: @test52( +; ALL-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 +; ALL-NEXT: [[C:%.*]] = and i32 [[B]], 7224 +; ALL-NEXT: [[D:%.*]] = or i32 [[C]], 32962 +; ALL-NEXT: ret i32 [[D]] ; %B = trunc i64 %A to i16 %C = or i16 %B, -32574 @@ -695,11 +695,11 @@ define i32 @test52(i64 %A) { } define i64 @test53(i32 %A) { -; CHECK-LABEL: @test53( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 7224 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 32962 -; CHECK-NEXT: [[D:%.*]] = zext i32 [[TMP2]] to i64 -; CHECK-NEXT: ret i64 [[D]] +; ALL-LABEL: @test53( +; ALL-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 7224 +; ALL-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 32962 +; ALL-NEXT: [[D:%.*]] = zext i32 [[TMP2]] to i64 +; ALL-NEXT: ret i64 [[D]] ; %B = trunc i32 %A to i16 %C = or i16 %B, -32574 @@ -709,11 +709,11 @@ define i64 @test53(i32 %A) { } define i32 @test54(i64 %A) { -; CHECK-LABEL: @test54( -; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 -; CHECK-NEXT: [[C:%.*]] = and i32 [[B]], 7224 -; CHECK-NEXT: [[D:%.*]] = or i32 [[C]], -32574 -; CHECK-NEXT: ret i32 [[D]] +; ALL-LABEL: @test54( +; ALL-NEXT: [[B:%.*]] = trunc i64 [[A:%.*]] to i32 +; ALL-NEXT: [[C:%.*]] = and i32 [[B]], 7224 +; ALL-NEXT: [[D:%.*]] = or i32 [[C]], -32574 +; ALL-NEXT: ret i32 [[D]] ; %B = trunc i64 %A to i16 %C = or i16 %B, -32574 @@ -723,11 +723,11 @@ define i32 @test54(i64 %A) { } define i64 @test55(i32 %A) { -; CHECK-LABEL: @test55( -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 7224 -; CHECK-NEXT: [[C:%.*]] = zext i32 [[TMP1]] to i64 -; CHECK-NEXT: [[D:%.*]] = or i64 [[C]], -32574 -; CHECK-NEXT: ret i64 [[D]] +; ALL-LABEL: @test55( +; ALL-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 7224 +; ALL-NEXT: [[C:%.*]] = zext i32 [[TMP1]] to i64 +; ALL-NEXT: [[D:%.*]] = or i64 [[C]], -32574 +; ALL-NEXT: ret i64 [[D]] ; %B = trunc i32 %A to i16 %C = or i16 %B, -32574 @@ -737,11 +737,11 @@ define i64 @test55(i32 %A) { } define i64 @test56(i16 %A) { -; CHECK-LABEL: @test56( -; CHECK-NEXT: [[P353:%.*]] = sext i16 [[A:%.*]] to i64 -; CHECK-NEXT: [[P354:%.*]] = lshr i64 [[P353]], 5 -; CHECK-NEXT: [[P355:%.*]] = and i64 [[P354]], 134217727 -; CHECK-NEXT: ret i64 [[P355]] +; ALL-LABEL: @test56( +; ALL-NEXT: [[P353:%.*]] = sext i16 [[A:%.*]] to i64 +; ALL-NEXT: [[P354:%.*]] = lshr i64 [[P353]], 5 +; ALL-NEXT: [[P355:%.*]] = and i64 [[P354]], 134217727 +; ALL-NEXT: ret i64 [[P355]] ; %p353 = sext i16 %A to i32 %p354 = lshr i32 %p353, 5 @@ -750,11 +750,11 @@ define i64 @test56(i16 %A) { } define <2 x i64> @test56vec(<2 x i16> %A) { -; CHECK-LABEL: @test56vec( -; CHECK-NEXT: [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[P354:%.*]] = lshr <2 x i32> [[P353]], -; CHECK-NEXT: [[P355:%.*]] = zext <2 x i32> [[P354]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[P355]] +; ALL-LABEL: @test56vec( +; ALL-NEXT: [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> +; ALL-NEXT: [[P354:%.*]] = lshr <2 x i32> [[P353]], +; ALL-NEXT: [[P355:%.*]] = zext <2 x i32> [[P354]] to <2 x i64> +; ALL-NEXT: ret <2 x i64> [[P355]] ; %p353 = sext <2 x i16> %A to <2 x i32> %p354 = lshr <2 x i32> %p353, @@ -763,10 +763,10 @@ define <2 x i64> @test56vec(<2 x i16> %A) { } define i64 @test57(i64 %A) { -; CHECK-LABEL: @test57( -; CHECK-NEXT: [[C:%.*]] = lshr i64 [[A:%.*]], 8 -; CHECK-NEXT: [[E:%.*]] = and i64 [[C]], 16777215 -; CHECK-NEXT: ret i64 [[E]] +; ALL-LABEL: @test57( +; ALL-NEXT: [[C:%.*]] = lshr i64 [[A:%.*]], 8 +; ALL-NEXT: [[E:%.*]] = and i64 [[C]], 16777215 +; ALL-NEXT: ret i64 [[E]] ; %B = trunc i64 %A to i32 %C = lshr i32 %B, 8 @@ -775,11 +775,11 @@ define i64 @test57(i64 %A) { } define <2 x i64> @test57vec(<2 x i64> %A) { -; CHECK-LABEL: @test57vec( -; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], -; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[C]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[E]] +; ALL-LABEL: @test57vec( +; ALL-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; ALL-NEXT: [[E:%.*]] = zext <2 x i32> [[C]] to <2 x i64> +; ALL-NEXT: ret <2 x i64> [[E]] ; %B = trunc <2 x i64> %A to <2 x i32> %C = lshr <2 x i32> %B, @@ -788,11 +788,11 @@ define <2 x i64> @test57vec(<2 x i64> %A) { } define i64 @test58(i64 %A) { -; CHECK-LABEL: @test58( -; CHECK-NEXT: [[C:%.*]] = lshr i64 [[A:%.*]], 8 -; CHECK-NEXT: [[D:%.*]] = and i64 [[C]], 16777087 -; CHECK-NEXT: [[E:%.*]] = or i64 [[D]], 128 -; CHECK-NEXT: ret i64 [[E]] +; ALL-LABEL: @test58( +; ALL-NEXT: [[C:%.*]] = lshr i64 [[A:%.*]], 8 +; ALL-NEXT: [[D:%.*]] = and i64 [[C]], 16777087 +; ALL-NEXT: [[E:%.*]] = or i64 [[D]], 128 +; ALL-NEXT: ret i64 [[E]] ; %B = trunc i64 %A to i32 %C = lshr i32 %B, 8 @@ -803,14 +803,14 @@ define i64 @test58(i64 %A) { } define i64 @test59(i8 %A, i8 %B) { -; CHECK-LABEL: @test59( -; CHECK-NEXT: [[C:%.*]] = zext i8 [[A:%.*]] to i64 -; CHECK-NEXT: [[D:%.*]] = shl nuw nsw i64 [[C]], 4 -; CHECK-NEXT: [[E:%.*]] = and i64 [[D]], 48 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[B:%.*]], 4 -; CHECK-NEXT: [[G:%.*]] = zext i8 [[TMP1]] to i64 -; CHECK-NEXT: [[H:%.*]] = or i64 [[E]], [[G]] -; CHECK-NEXT: ret i64 [[H]] +; ALL-LABEL: @test59( +; ALL-NEXT: [[C:%.*]] = zext i8 [[A:%.*]] to i64 +; ALL-NEXT: [[D:%.*]] = shl nuw nsw i64 [[C]], 4 +; ALL-NEXT: [[E:%.*]] = and i64 [[D]], 48 +; ALL-NEXT: [[TMP1:%.*]] = lshr i8 [[B:%.*]], 4 +; ALL-NEXT: [[G:%.*]] = zext i8 [[TMP1]] to i64 +; ALL-NEXT: [[H:%.*]] = or i64 [[E]], [[G]] +; ALL-NEXT: ret i64 [[H]] ; %C = zext i8 %A to i32 %D = shl i32 %C, 4 @@ -823,9 +823,9 @@ define i64 @test59(i8 %A, i8 %B) { } define <3 x i32> @test60(<4 x i32> %call4) { -; CHECK-LABEL: @test60( -; CHECK-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> -; CHECK-NEXT: ret <3 x i32> [[P10]] +; ALL-LABEL: @test60( +; ALL-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> +; ALL-NEXT: ret <3 x i32> [[P10]] ; %p11 = bitcast <4 x i32> %call4 to i128 %p9 = trunc i128 %p11 to i96 @@ -835,9 +835,9 @@ define <3 x i32> @test60(<4 x i32> %call4) { } define <4 x i32> @test61(<3 x i32> %call4) { -; CHECK-LABEL: @test61( -; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[P10]] +; ALL-LABEL: @test61( +; ALL-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> +; ALL-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x i32> %call4 to i96 %p9 = zext i96 %p11 to i128 @@ -846,10 +846,10 @@ define <4 x i32> @test61(<3 x i32> %call4) { } define <4 x i32> @test62(<3 x float> %call4) { -; CHECK-LABEL: @test62( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> -; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[P10]] +; ALL-LABEL: @test62( +; ALL-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> +; ALL-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> +; ALL-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x float> %call4 to i96 %p9 = zext i96 %p11 to i128 @@ -859,10 +859,10 @@ define <4 x i32> @test62(<3 x float> %call4) { ; PR7311 - Don't create invalid IR on scalar->vector cast. define <2 x float> @test63(i64 %t8) { -; CHECK-LABEL: @test63( -; CHECK-NEXT: [[A:%.*]] = bitcast i64 [[T8:%.*]] to <2 x i32> -; CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <2 x i32> [[A]] to <2 x float> -; CHECK-NEXT: ret <2 x float> [[VCVT_I]] +; ALL-LABEL: @test63( +; ALL-NEXT: [[A:%.*]] = bitcast i64 [[T8:%.*]] to <2 x i32> +; ALL-NEXT: [[VCVT_I:%.*]] = uitofp <2 x i32> [[A]] to <2 x float> +; ALL-NEXT: ret <2 x float> [[VCVT_I]] ; %a = bitcast i64 %t8 to <2 x i32> %vcvt.i = uitofp <2 x i32> %a to <2 x float> @@ -870,8 +870,8 @@ define <2 x float> @test63(i64 %t8) { } define <4 x float> @test64(<4 x float> %c) { -; CHECK-LABEL: @test64( -; CHECK-NEXT: ret <4 x float> [[C:%.*]] +; ALL-LABEL: @test64( +; ALL-NEXT: ret <4 x float> [[C:%.*]] ; %t0 = bitcast <4 x float> %c to <4 x i32> %t1 = bitcast <4 x i32> %t0 to <4 x float> @@ -879,8 +879,8 @@ define <4 x float> @test64(<4 x float> %c) { } define <4 x float> @test65(<4 x float> %c) { -; CHECK-LABEL: @test65( -; CHECK-NEXT: ret <4 x float> [[C:%.*]] +; ALL-LABEL: @test65( +; ALL-NEXT: ret <4 x float> [[C:%.*]] ; %t0 = bitcast <4 x float> %c to <2 x double> %t1 = bitcast <2 x double> %t0 to <4 x float> @@ -888,8 +888,8 @@ define <4 x float> @test65(<4 x float> %c) { } define <2 x float> @test66(<2 x float> %c) { -; CHECK-LABEL: @test66( -; CHECK-NEXT: ret <2 x float> [[C:%.*]] +; ALL-LABEL: @test66( +; ALL-NEXT: ret <2 x float> [[C:%.*]] ; %t0 = bitcast <2 x float> %c to double %t1 = bitcast double %t0 to <2 x float> @@ -897,16 +897,16 @@ define <2 x float> @test66(<2 x float> %c) { } define float @test2c() { -; CHECK-LABEL: @test2c( -; CHECK-NEXT: ret float -1.000000e+00 +; ALL-LABEL: @test2c( +; ALL-NEXT: ret float -1.000000e+00 ; ret float extractelement (<2 x float> bitcast (double bitcast (<2 x float> to double) to <2 x float>), i32 0) } define i64 @test_mmx(<2 x i32> %x) { -; CHECK-LABEL: @test_mmx( -; CHECK-NEXT: [[C:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64 -; CHECK-NEXT: ret i64 [[C]] +; ALL-LABEL: @test_mmx( +; ALL-NEXT: [[C:%.*]] = bitcast <2 x i32> [[X:%.*]] to i64 +; ALL-NEXT: ret i64 [[C]] ; %A = bitcast <2 x i32> %x to x86_mmx %B = bitcast x86_mmx %A to <2 x i32> @@ -915,8 +915,8 @@ define i64 @test_mmx(<2 x i32> %x) { } define i64 @test_mmx_const(<2 x i32> %c) { -; CHECK-LABEL: @test_mmx_const( -; CHECK-NEXT: ret i64 0 +; ALL-LABEL: @test_mmx_const( +; ALL-NEXT: ret i64 0 ; %A = bitcast <2 x i32> zeroinitializer to x86_mmx %B = bitcast x86_mmx %A to <2 x i32> @@ -926,8 +926,8 @@ define i64 @test_mmx_const(<2 x i32> %c) { ; PR12514 define i1 @test67(i1 %a, i32 %b) { -; CHECK-LABEL: @test67( -; CHECK-NEXT: ret i1 false +; ALL-LABEL: @test67( +; ALL-NEXT: ret i1 false ; %t2 = zext i1 %a to i32 %conv6 = xor i32 %t2, 1 @@ -943,10 +943,10 @@ define i1 @test67(i1 %a, i32 %b) { %s = type { i32, i32, i16 } define %s @test68(%s *%p, i64 %i) { -; CHECK-LABEL: @test68( -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]] -; CHECK-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 -; CHECK-NEXT: ret [[S]] %l +; ALL-LABEL: @test68( +; ALL-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]] +; ALL-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 +; ALL-NEXT: ret [[S]] %l ; %o = mul i64 %i, 12 %q = bitcast %s* %p to i8* @@ -958,10 +958,10 @@ define %s @test68(%s *%p, i64 %i) { ; addrspacecasts should be eliminated. define %s @test68_addrspacecast(%s* %p, i64 %i) { -; CHECK-LABEL: @test68_addrspacecast( -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]] -; CHECK-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 -; CHECK-NEXT: ret [[S]] %l +; ALL-LABEL: @test68_addrspacecast( +; ALL-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]] +; ALL-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 +; ALL-NEXT: ret [[S]] %l ; %o = mul i64 %i, 12 %q = addrspacecast %s* %p to i8 addrspace(2)* @@ -972,11 +972,11 @@ define %s @test68_addrspacecast(%s* %p, i64 %i) { } define %s @test68_addrspacecast_2(%s* %p, i64 %i) { -; CHECK-LABEL: @test68_addrspacecast_2( -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]] -; CHECK-NEXT: [[R:%.*]] = addrspacecast %s* [[PP1]] to [[S]] addrspace(1)* -; CHECK-NEXT: [[L:%.*]] = load [[S]], [[S]] addrspace(1)* [[R]], align 4 -; CHECK-NEXT: ret [[S]] %l +; ALL-LABEL: @test68_addrspacecast_2( +; ALL-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[I:%.*]] +; ALL-NEXT: [[R:%.*]] = addrspacecast %s* [[PP1]] to [[S]] addrspace(1)* +; ALL-NEXT: [[L:%.*]] = load [[S]], [[S]] addrspace(1)* [[R]], align 4 +; ALL-NEXT: ret [[S]] %l ; %o = mul i64 %i, 12 %q = addrspacecast %s* %p to i8 addrspace(2)* @@ -987,10 +987,10 @@ define %s @test68_addrspacecast_2(%s* %p, i64 %i) { } define %s @test68_as1(%s addrspace(1)* %p, i32 %i) { -; CHECK-LABEL: @test68_as1( -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], [[S]] addrspace(1)* [[P:%.*]], i32 [[I:%.*]] -; CHECK-NEXT: [[L:%.*]] = load [[S]], [[S]] addrspace(1)* [[PP1]], align 4 -; CHECK-NEXT: ret [[S]] %l +; ALL-LABEL: @test68_as1( +; ALL-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], [[S]] addrspace(1)* [[P:%.*]], i32 [[I:%.*]] +; ALL-NEXT: [[L:%.*]] = load [[S]], [[S]] addrspace(1)* [[PP1]], align 4 +; ALL-NEXT: ret [[S]] %l ; %o = mul i32 %i, 12 %q = bitcast %s addrspace(1)* %p to i8 addrspace(1)* @@ -1001,10 +1001,10 @@ define %s @test68_as1(%s addrspace(1)* %p, i32 %i) { } define double @test69(double *%p, i64 %i) { -; CHECK-LABEL: @test69( -; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[I:%.*]] -; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test69( +; ALL-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[I:%.*]] +; ALL-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 +; ALL-NEXT: ret double [[L]] ; %o = shl nsw i64 %i, 3 %q = bitcast double* %p to i8* @@ -1015,11 +1015,11 @@ define double @test69(double *%p, i64 %i) { } define %s @test70(%s *%p, i64 %i) { -; CHECK-LABEL: @test70( -; CHECK-NEXT: [[O:%.*]] = mul nsw i64 [[I:%.*]], 3 -; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds [[S:%.*]], %s* [[P:%.*]], i64 [[O]] -; CHECK-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 -; CHECK-NEXT: ret [[S]] %l +; ALL-LABEL: @test70( +; ALL-NEXT: [[O:%.*]] = mul nsw i64 [[I:%.*]], 3 +; ALL-NEXT: [[PP1:%.*]] = getelementptr inbounds [[S:%.*]], %s* [[P:%.*]], i64 [[O]] +; ALL-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 +; ALL-NEXT: ret [[S]] %l ; %o = mul nsw i64 %i, 36 %q = bitcast %s* %p to i8* @@ -1030,11 +1030,11 @@ define %s @test70(%s *%p, i64 %i) { } define double @test71(double *%p, i64 %i) { -; CHECK-LABEL: @test71( -; CHECK-NEXT: [[O:%.*]] = shl i64 [[I:%.*]], 2 -; CHECK-NEXT: [[PP1:%.*]] = getelementptr double, double* [[P:%.*]], i64 [[O]] -; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test71( +; ALL-NEXT: [[O:%.*]] = shl i64 [[I:%.*]], 2 +; ALL-NEXT: [[PP1:%.*]] = getelementptr double, double* [[P:%.*]], i64 [[O]] +; ALL-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 +; ALL-NEXT: ret double [[L]] ; %o = shl i64 %i, 5 %q = bitcast double* %p to i8* @@ -1045,11 +1045,11 @@ define double @test71(double *%p, i64 %i) { } define double @test72(double *%p, i32 %i) { -; CHECK-LABEL: @test72( -; CHECK-NEXT: [[O:%.*]] = sext i32 [[I:%.*]] to i64 -; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[O]] -; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test72( +; ALL-NEXT: [[O:%.*]] = sext i32 [[I:%.*]] to i64 +; ALL-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[O]] +; ALL-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 +; ALL-NEXT: ret double [[L]] ; %so = shl nsw i32 %i, 3 %o = sext i32 %so to i64 @@ -1061,11 +1061,11 @@ define double @test72(double *%p, i32 %i) { } define double @test73(double *%p, i128 %i) { -; CHECK-LABEL: @test73( -; CHECK-NEXT: [[I_TR:%.*]] = trunc i128 [[I:%.*]] to i64 -; CHECK-NEXT: [[PP1:%.*]] = getelementptr double, double* [[P:%.*]], i64 [[I_TR]] -; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test73( +; ALL-NEXT: [[I_TR:%.*]] = trunc i128 [[I:%.*]] to i64 +; ALL-NEXT: [[PP1:%.*]] = getelementptr double, double* [[P:%.*]], i64 [[I_TR]] +; ALL-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 +; ALL-NEXT: ret double [[L]] ; %lo = shl nsw i128 %i, 3 %o = trunc i128 %lo to i64 @@ -1077,10 +1077,10 @@ define double @test73(double *%p, i128 %i) { } define double @test74(double *%p, i64 %i) { -; CHECK-LABEL: @test74( -; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[I:%.*]] -; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test74( +; ALL-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 [[I:%.*]] +; ALL-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 +; ALL-NEXT: ret double [[L]] ; %q = bitcast double* %p to i64* %pp = getelementptr inbounds i64, i64* %q, i64 %i @@ -1090,13 +1090,13 @@ define double @test74(double *%p, i64 %i) { } define i32* @test75(i32* %p, i32 %x) { -; CHECK-LABEL: @test75( -; CHECK-NEXT: [[Y:%.*]] = shl i32 [[X:%.*]], 3 -; CHECK-NEXT: [[Z:%.*]] = sext i32 [[Y]] to i64 -; CHECK-NEXT: [[Q:%.*]] = bitcast i32* [[P:%.*]] to i8* -; CHECK-NEXT: [[R:%.*]] = getelementptr i8, i8* [[Q]], i64 [[Z]] -; CHECK-NEXT: [[S:%.*]] = bitcast i8* [[R]] to i32* -; CHECK-NEXT: ret i32* [[S]] +; ALL-LABEL: @test75( +; ALL-NEXT: [[Y:%.*]] = shl i32 [[X:%.*]], 3 +; ALL-NEXT: [[Z:%.*]] = sext i32 [[Y]] to i64 +; ALL-NEXT: [[Q:%.*]] = bitcast i32* [[P:%.*]] to i8* +; ALL-NEXT: [[R:%.*]] = getelementptr i8, i8* [[Q]], i64 [[Z]] +; ALL-NEXT: [[S:%.*]] = bitcast i8* [[R]] to i32* +; ALL-NEXT: ret i32* [[S]] ; %y = shl i32 %x, 3 %z = sext i32 %y to i64 @@ -1107,11 +1107,11 @@ define i32* @test75(i32* %p, i32 %x) { } define %s @test76(%s *%p, i64 %i, i64 %j) { -; CHECK-LABEL: @test76( -; CHECK-NEXT: [[O2:%.*]] = mul i64 [[I:%.*]], [[J:%.*]] -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[O2]] -; CHECK-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 -; CHECK-NEXT: ret [[S]] %l +; ALL-LABEL: @test76( +; ALL-NEXT: [[O2:%.*]] = mul i64 [[I:%.*]], [[J:%.*]] +; ALL-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[O2]] +; ALL-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 +; ALL-NEXT: ret [[S]] %l ; %o = mul i64 %i, 12 %o2 = mul nsw i64 %o, %j @@ -1123,12 +1123,12 @@ define %s @test76(%s *%p, i64 %i, i64 %j) { } define %s @test77(%s *%p, i64 %i, i64 %j) { -; CHECK-LABEL: @test77( -; CHECK-NEXT: [[O:%.*]] = mul nsw i64 [[I:%.*]], 3 -; CHECK-NEXT: [[O2:%.*]] = mul nsw i64 [[O]], [[J:%.*]] -; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds [[S:%.*]], %s* [[P:%.*]], i64 [[O2]] -; CHECK-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 -; CHECK-NEXT: ret [[S]] %l +; ALL-LABEL: @test77( +; ALL-NEXT: [[O:%.*]] = mul nsw i64 [[I:%.*]], 3 +; ALL-NEXT: [[O2:%.*]] = mul nsw i64 [[O]], [[J:%.*]] +; ALL-NEXT: [[PP1:%.*]] = getelementptr inbounds [[S:%.*]], %s* [[P:%.*]], i64 [[O2]] +; ALL-NEXT: [[L:%.*]] = load [[S]], %s* [[PP1]], align 4 +; ALL-NEXT: ret [[S]] %l ; %o = mul nsw i64 %i, 36 %o2 = mul nsw i64 %o, %j @@ -1140,18 +1140,18 @@ define %s @test77(%s *%p, i64 %i, i64 %j) { } define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) { -; CHECK-LABEL: @test78( -; CHECK-NEXT: [[A:%.*]] = mul nsw i32 [[K:%.*]], 3 -; CHECK-NEXT: [[B:%.*]] = mul nsw i32 [[A]], [[L:%.*]] -; CHECK-NEXT: [[C:%.*]] = sext i32 [[B]] to i128 -; CHECK-NEXT: [[D:%.*]] = mul nsw i128 [[C]], [[M:%.*]] -; CHECK-NEXT: [[E:%.*]] = mul i128 [[D]], [[N:%.*]] -; CHECK-NEXT: [[F:%.*]] = trunc i128 [[E]] to i64 -; CHECK-NEXT: [[G:%.*]] = mul i64 [[F]], [[I:%.*]] -; CHECK-NEXT: [[H:%.*]] = mul i64 [[G]], [[J:%.*]] -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[H]] -; CHECK-NEXT: [[LOAD:%.*]] = load [[S]], %s* [[PP1]], align 4 -; CHECK-NEXT: ret [[S]] %load +; ALL-LABEL: @test78( +; ALL-NEXT: [[A:%.*]] = mul nsw i32 [[K:%.*]], 3 +; ALL-NEXT: [[B:%.*]] = mul nsw i32 [[A]], [[L:%.*]] +; ALL-NEXT: [[C:%.*]] = sext i32 [[B]] to i128 +; ALL-NEXT: [[D:%.*]] = mul nsw i128 [[C]], [[M:%.*]] +; ALL-NEXT: [[E:%.*]] = mul i128 [[D]], [[N:%.*]] +; ALL-NEXT: [[F:%.*]] = trunc i128 [[E]] to i64 +; ALL-NEXT: [[G:%.*]] = mul i64 [[F]], [[I:%.*]] +; ALL-NEXT: [[H:%.*]] = mul i64 [[G]], [[J:%.*]] +; ALL-NEXT: [[PP1:%.*]] = getelementptr [[S:%.*]], %s* [[P:%.*]], i64 [[H]] +; ALL-NEXT: [[LOAD:%.*]] = load [[S]], %s* [[PP1]], align 4 +; ALL-NEXT: ret [[S]] %load ; %a = mul nsw i32 %k, 36 %b = mul nsw i32 %a, %l @@ -1169,16 +1169,16 @@ define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) { } define %s @test79(%s *%p, i64 %i, i32 %j) { -; CHECK-LABEL: @test79( -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32 -; CHECK-NEXT: [[B:%.*]] = mul i32 [[TMP1]], 36 -; CHECK-NEXT: [[C:%.*]] = mul i32 [[B]], [[J:%.*]] -; CHECK-NEXT: [[Q:%.*]] = bitcast %s* [[P:%.*]] to i8* -; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[C]] to i64 -; CHECK-NEXT: [[PP:%.*]] = getelementptr inbounds i8, i8* [[Q]], i64 [[TMP2]] -; CHECK-NEXT: [[R:%.*]] = bitcast i8* [[PP]] to %s* -; CHECK-NEXT: [[L:%.*]] = load [[S:%.*]], %s* [[R]], align 4 -; CHECK-NEXT: ret [[S]] %l +; ALL-LABEL: @test79( +; ALL-NEXT: [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i32 +; ALL-NEXT: [[B:%.*]] = mul i32 [[TMP1]], 36 +; ALL-NEXT: [[C:%.*]] = mul i32 [[B]], [[J:%.*]] +; ALL-NEXT: [[Q:%.*]] = bitcast %s* [[P:%.*]] to i8* +; ALL-NEXT: [[TMP2:%.*]] = sext i32 [[C]] to i64 +; ALL-NEXT: [[PP:%.*]] = getelementptr inbounds i8, i8* [[Q]], i64 [[TMP2]] +; ALL-NEXT: [[R:%.*]] = bitcast i8* [[PP]] to %s* +; ALL-NEXT: [[L:%.*]] = load [[S:%.*]], %s* [[R]], align 4 +; ALL-NEXT: ret [[S]] %l ; %a = mul nsw i64 %i, 36 %b = trunc i64 %a to i32 @@ -1191,11 +1191,11 @@ define %s @test79(%s *%p, i64 %i, i32 %j) { } define double @test80([100 x double]* %p, i32 %i) { -; CHECK-LABEL: @test80( -; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[I:%.*]] to i64 -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double]* [[P:%.*]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test80( +; ALL-NEXT: [[TMP1:%.*]] = sext i32 [[I:%.*]] to i64 +; ALL-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double]* [[P:%.*]], i64 0, i64 [[TMP1]] +; ALL-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8 +; ALL-NEXT: ret double [[L]] ; %t = shl nsw i32 %i, 3 %q = bitcast [100 x double]* %p to i8* @@ -1206,10 +1206,10 @@ define double @test80([100 x double]* %p, i32 %i) { } define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) { -; CHECK-LABEL: @test80_addrspacecast( -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[I:%.*]] -; CHECK-NEXT: [[L:%.*]] = load double, double addrspace(1)* [[PP1]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test80_addrspacecast( +; ALL-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[I:%.*]] +; ALL-NEXT: [[L:%.*]] = load double, double addrspace(1)* [[PP1]], align 8 +; ALL-NEXT: ret double [[L]] ; %t = shl nsw i32 %i, 3 %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)* @@ -1220,11 +1220,11 @@ define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) { } define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) { -; CHECK-LABEL: @test80_addrspacecast_2( -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[I:%.*]] -; CHECK-NEXT: [[R:%.*]] = addrspacecast double addrspace(1)* [[PP1]] to double addrspace(3)* -; CHECK-NEXT: [[L:%.*]] = load double, double addrspace(3)* [[R]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test80_addrspacecast_2( +; ALL-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[I:%.*]] +; ALL-NEXT: [[R:%.*]] = addrspacecast double addrspace(1)* [[PP1]] to double addrspace(3)* +; ALL-NEXT: [[L:%.*]] = load double, double addrspace(3)* [[R]], align 8 +; ALL-NEXT: ret double [[L]] ; %t = shl nsw i32 %i, 3 %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)* @@ -1235,11 +1235,11 @@ define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) { } define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) { -; CHECK-LABEL: @test80_as1( -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[I:%.*]] to i32 -; CHECK-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[TMP1]] -; CHECK-NEXT: [[L:%.*]] = load double, double addrspace(1)* [[PP1]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test80_as1( +; ALL-NEXT: [[TMP1:%.*]] = sext i16 [[I:%.*]] to i32 +; ALL-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* [[P:%.*]], i32 0, i32 [[TMP1]] +; ALL-NEXT: [[L:%.*]] = load double, double addrspace(1)* [[PP1]], align 8 +; ALL-NEXT: ret double [[L]] ; %t = shl nsw i16 %i, 3 %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)* @@ -1250,13 +1250,13 @@ define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) { } define double @test81(double *%p, float %f) { -; CHECK-LABEL: @test81( -; CHECK-NEXT: [[I:%.*]] = fptosi float [[F:%.*]] to i64 -; CHECK-NEXT: [[Q:%.*]] = bitcast double* [[P:%.*]] to i8* -; CHECK-NEXT: [[PP:%.*]] = getelementptr i8, i8* [[Q]], i64 [[I]] -; CHECK-NEXT: [[R:%.*]] = bitcast i8* [[PP]] to double* -; CHECK-NEXT: [[L:%.*]] = load double, double* [[R]], align 8 -; CHECK-NEXT: ret double [[L]] +; ALL-LABEL: @test81( +; ALL-NEXT: [[I:%.*]] = fptosi float [[F:%.*]] to i64 +; ALL-NEXT: [[Q:%.*]] = bitcast double* [[P:%.*]] to i8* +; ALL-NEXT: [[PP:%.*]] = getelementptr i8, i8* [[Q]], i64 [[I]] +; ALL-NEXT: [[R:%.*]] = bitcast i8* [[PP]] to double* +; ALL-NEXT: [[L:%.*]] = load double, double* [[R]], align 8 +; ALL-NEXT: ret double [[L]] ; %i = fptosi float %f to i64 %q = bitcast double* %p to i8* @@ -1267,10 +1267,10 @@ define double @test81(double *%p, float %f) { } define i64 @test82(i64 %A) { -; CHECK-LABEL: @test82( -; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[A:%.*]], 1 -; CHECK-NEXT: [[E:%.*]] = and i64 [[TMP1]], 4294966784 -; CHECK-NEXT: ret i64 [[E]] +; ALL-LABEL: @test82( +; ALL-NEXT: [[TMP1:%.*]] = shl i64 [[A:%.*]], 1 +; ALL-NEXT: [[E:%.*]] = and i64 [[TMP1]], 4294966784 +; ALL-NEXT: ret i64 [[E]] ; %B = trunc i64 %A to i32 %C = lshr i32 %B, 8 @@ -1281,13 +1281,13 @@ define i64 @test82(i64 %A) { ; PR15959 define i64 @test83(i16 %a, i64 %k) { -; CHECK-LABEL: @test83( -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[A:%.*]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[K:%.*]] to i32 -; CHECK-NEXT: [[SH_PROM:%.*]] = add i32 [[TMP1]], -1 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[SH_PROM]] -; CHECK-NEXT: [[SH_PROM1:%.*]] = zext i32 [[SHL]] to i64 -; CHECK-NEXT: ret i64 [[SH_PROM1]] +; ALL-LABEL: @test83( +; ALL-NEXT: [[CONV:%.*]] = sext i16 [[A:%.*]] to i32 +; ALL-NEXT: [[TMP1:%.*]] = trunc i64 [[K:%.*]] to i32 +; ALL-NEXT: [[SH_PROM:%.*]] = add i32 [[TMP1]], -1 +; ALL-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[SH_PROM]] +; ALL-NEXT: [[SH_PROM1:%.*]] = zext i32 [[SHL]] to i64 +; ALL-NEXT: ret i64 [[SH_PROM1]] ; %conv = sext i16 %a to i32 %sub = add nsw i64 %k, -1 @@ -1298,11 +1298,11 @@ define i64 @test83(i16 %a, i64 %k) { } define i8 @test84(i32 %a) { -; CHECK-LABEL: @test84( -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432 -; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8 -; CHECK-NEXT: ret i8 [[TRUNC]] +; ALL-LABEL: @test84( +; ALL-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432 +; ALL-NEXT: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23 +; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8 +; ALL-NEXT: ret i8 [[TRUNC]] ; %add = add nsw i32 %a, -16777216 %shr = lshr exact i32 %add, 23 @@ -1311,11 +1311,11 @@ define i8 @test84(i32 %a) { } define i8 @test85(i32 %a) { -; CHECK-LABEL: @test85( -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432 -; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8 -; CHECK-NEXT: ret i8 [[TRUNC]] +; ALL-LABEL: @test85( +; ALL-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432 +; ALL-NEXT: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23 +; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8 +; ALL-NEXT: ret i8 [[TRUNC]] ; %add = add nuw i32 %a, -16777216 %shr = lshr exact i32 %add, 23 @@ -1324,9 +1324,9 @@ define i8 @test85(i32 %a) { } define i16 @test86(i16 %v) { -; CHECK-LABEL: @test86( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i16 [[V:%.*]], 4 -; CHECK-NEXT: ret i16 [[TMP1]] +; ALL-LABEL: @test86( +; ALL-NEXT: [[TMP1:%.*]] = ashr i16 [[V:%.*]], 4 +; ALL-NEXT: ret i16 [[TMP1]] ; %a = sext i16 %v to i32 %s = ashr i32 %a, 4 @@ -1335,9 +1335,9 @@ define i16 @test86(i16 %v) { } define i16 @test87(i16 %v) { -; CHECK-LABEL: @test87( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i16 [[V:%.*]], 12 -; CHECK-NEXT: ret i16 [[TMP1]] +; ALL-LABEL: @test87( +; ALL-NEXT: [[TMP1:%.*]] = ashr i16 [[V:%.*]], 12 +; ALL-NEXT: ret i16 [[TMP1]] ; %c = sext i16 %v to i32 %m = mul nsw i32 %c, 16 @@ -1347,9 +1347,9 @@ define i16 @test87(i16 %v) { } define i16 @test88(i16 %v) { -; CHECK-LABEL: @test88( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i16 [[V:%.*]], 15 -; CHECK-NEXT: ret i16 [[TMP1]] +; ALL-LABEL: @test88( +; ALL-NEXT: [[TMP1:%.*]] = ashr i16 [[V:%.*]], 15 +; ALL-NEXT: ret i16 [[TMP1]] ; %a = sext i16 %v to i32 %s = ashr i32 %a, 18 @@ -1358,10 +1358,10 @@ define i16 @test88(i16 %v) { } define i32 @PR21388(i32* %v) { -; CHECK-LABEL: @PR21388( -; CHECK-NEXT: [[ICMP:%.*]] = icmp slt i32* [[V:%.*]], null -; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[ICMP]] to i32 -; CHECK-NEXT: ret i32 [[SEXT]] +; ALL-LABEL: @PR21388( +; ALL-NEXT: [[ICMP:%.*]] = icmp slt i32* [[V:%.*]], null +; ALL-NEXT: [[SEXT:%.*]] = sext i1 [[ICMP]] to i32 +; ALL-NEXT: ret i32 [[SEXT]] ; %icmp = icmp slt i32* %v, null %sext = sext i1 %icmp to i32 @@ -1369,9 +1369,9 @@ define i32 @PR21388(i32* %v) { } define float @sitofp_zext(i16 %a) { -; CHECK-LABEL: @sitofp_zext( -; CHECK-NEXT: [[SITOFP:%.*]] = uitofp i16 [[A:%.*]] to float -; CHECK-NEXT: ret float [[SITOFP]] +; ALL-LABEL: @sitofp_zext( +; ALL-NEXT: [[SITOFP:%.*]] = uitofp i16 [[A:%.*]] to float +; ALL-NEXT: ret float [[SITOFP]] ; %zext = zext i16 %a to i32 %sitofp = sitofp i32 %zext to float @@ -1379,11 +1379,11 @@ define float @sitofp_zext(i16 %a) { } define i1 @PR23309(i32 %A, i32 %B) { -; CHECK-LABEL: @PR23309( -; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUB]], 1 -; CHECK-NEXT: [[TRUNC:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: ret i1 [[TRUNC]] +; ALL-LABEL: @PR23309( +; ALL-NEXT: [[SUB:%.*]] = sub i32 [[A:%.*]], [[B:%.*]] +; ALL-NEXT: [[TMP1:%.*]] = and i32 [[SUB]], 1 +; ALL-NEXT: [[TRUNC:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: ret i1 [[TRUNC]] ; %add = add i32 %A, -4 %sub = sub nsw i32 %add, %B @@ -1392,11 +1392,11 @@ define i1 @PR23309(i32 %A, i32 %B) { } define i1 @PR23309v2(i32 %A, i32 %B) { -; CHECK-LABEL: @PR23309v2( -; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUB]], 1 -; CHECK-NEXT: [[TRUNC:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: ret i1 [[TRUNC]] +; ALL-LABEL: @PR23309v2( +; ALL-NEXT: [[SUB:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; ALL-NEXT: [[TMP1:%.*]] = and i32 [[SUB]], 1 +; ALL-NEXT: [[TRUNC:%.*]] = icmp ne i32 [[TMP1]], 0 +; ALL-NEXT: ret i1 [[TRUNC]] ; %add = add i32 %A, -4 %sub = add nuw i32 %add, %B @@ -1405,10 +1405,10 @@ define i1 @PR23309v2(i32 %A, i32 %B) { } define i16 @PR24763(i8 %V) { -; CHECK-LABEL: @PR24763( -; CHECK-NEXT: [[L:%.*]] = ashr i8 [[V:%.*]], 1 -; CHECK-NEXT: [[T:%.*]] = sext i8 [[L]] to i16 -; CHECK-NEXT: ret i16 [[T]] +; ALL-LABEL: @PR24763( +; ALL-NEXT: [[L:%.*]] = ashr i8 [[V:%.*]], 1 +; ALL-NEXT: [[T:%.*]] = sext i8 [[L]] to i16 +; ALL-NEXT: ret i16 [[T]] ; %conv = sext i8 %V to i32 %l = lshr i32 %conv, 1 @@ -1417,23 +1417,32 @@ define i16 @PR24763(i8 %V) { } define i64 @PR28745() { -; CHECK-LABEL: @PR28745( -; CHECK-NEXT: ret i64 1 +; BE-LABEL: @PR28745( +; BE-NEXT: ret i64 1 +; +; LE-LABEL: @PR28745( +; LE-NEXT: ret i64 0 ; %b = zext i32 extractvalue ({ i32 } select (i1 icmp eq (i16 extractelement (<2 x i16> bitcast (<1 x i32> to <2 x i16>), i32 0), i16 0), { i32 } { i32 1 }, { i32 } zeroinitializer), 0) to i64 ret i64 %b } define i32 @test89() { -; CHECK-LABEL: @test89( -; CHECK-NEXT: ret i32 393216 +; BE-LABEL: @test89( +; BE-NEXT: ret i32 393216 +; +; LE-LABEL: @test89( +; LE-NEXT: ret i32 6 ; ret i32 bitcast (<2 x i16> to i32) } define <2 x i32> @test90() { -; CHECK-LABEL: @test90( -; CHECK-NEXT: ret <2 x i32> +; BE-LABEL: @test90( +; BE-NEXT: ret <2 x i32> +; +; LE-LABEL: @test90( +; LE-NEXT: ret <2 x i32> ; %t6 = bitcast <4 x half> to <2 x i32> ret <2 x i32> %t6 @@ -1441,11 +1450,11 @@ define <2 x i32> @test90() { ; Do not optimize to ashr i64 (shift by 48 > 96 - 64) define i64 @test91(i64 %A) { -; CHECK-LABEL: @test91( -; CHECK-NEXT: [[B:%.*]] = sext i64 [[A:%.*]] to i96 -; CHECK-NEXT: [[C:%.*]] = lshr i96 [[B]], 48 -; CHECK-NEXT: [[D:%.*]] = trunc i96 [[C]] to i64 -; CHECK-NEXT: ret i64 [[D]] +; ALL-LABEL: @test91( +; ALL-NEXT: [[B:%.*]] = sext i64 [[A:%.*]] to i96 +; ALL-NEXT: [[C:%.*]] = lshr i96 [[B]], 48 +; ALL-NEXT: [[D:%.*]] = trunc i96 [[C]] to i64 +; ALL-NEXT: ret i64 [[D]] ; %B = sext i64 %A to i96 %C = lshr i96 %B, 48 @@ -1455,9 +1464,9 @@ define i64 @test91(i64 %A) { ; Do optimize to ashr i64 (shift by 32 <= 96 - 64) define i64 @test92(i64 %A) { -; CHECK-LABEL: @test92( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[A:%.*]], 32 -; CHECK-NEXT: ret i64 [[TMP1]] +; ALL-LABEL: @test92( +; ALL-NEXT: [[TMP1:%.*]] = ashr i64 [[A:%.*]], 32 +; ALL-NEXT: ret i64 [[TMP1]] ; %B = sext i64 %A to i96 %C = lshr i96 %B, 32 @@ -1467,9 +1476,9 @@ define i64 @test92(i64 %A) { ; When optimizing to ashr i32, don't shift by more than 31. define i32 @test93(i32 %A) { -; CHECK-LABEL: @test93( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31 -; CHECK-NEXT: ret i32 [[TMP1]] +; ALL-LABEL: @test93( +; ALL-NEXT: [[TMP1:%.*]] = ashr i32 [[A:%.*]], 31 +; ALL-NEXT: ret i32 [[TMP1]] ; %B = sext i32 %A to i96 %C = lshr i96 %B, 64 @@ -1481,9 +1490,9 @@ define i32 @test93(i32 %A) { ; PR33078 define i8 @pr33078_1(i8 %A) { -; CHECK-LABEL: @pr33078_1( -; CHECK-NEXT: [[TMP1:%.*]] = ashr i8 [[A:%.*]], 7 -; CHECK-NEXT: ret i8 [[TMP1]] +; ALL-LABEL: @pr33078_1( +; ALL-NEXT: [[TMP1:%.*]] = ashr i8 [[A:%.*]], 7 +; ALL-NEXT: ret i8 [[TMP1]] ; %B = sext i8 %A to i16 %C = lshr i16 %B, 8 @@ -1492,10 +1501,10 @@ define i8 @pr33078_1(i8 %A) { } define i12 @pr33078_2(i8 %A) { -; CHECK-LABEL: @pr33078_2( -; CHECK-NEXT: [[C:%.*]] = ashr i8 [[A:%.*]], 4 -; CHECK-NEXT: [[D:%.*]] = sext i8 [[C]] to i12 -; CHECK-NEXT: ret i12 [[D]] +; ALL-LABEL: @pr33078_2( +; ALL-NEXT: [[C:%.*]] = ashr i8 [[A:%.*]], 4 +; ALL-NEXT: [[D:%.*]] = sext i8 [[C]] to i12 +; ALL-NEXT: ret i12 [[D]] ; %B = sext i8 %A to i16 %C = lshr i16 %B, 4 @@ -1504,11 +1513,11 @@ define i12 @pr33078_2(i8 %A) { } define i4 @pr33078_3(i8 %A) { -; CHECK-LABEL: @pr33078_3( -; CHECK-NEXT: [[B:%.*]] = sext i8 [[A:%.*]] to i16 -; CHECK-NEXT: [[C:%.*]] = lshr i16 [[B]], 12 -; CHECK-NEXT: [[D:%.*]] = trunc i16 [[C]] to i4 -; CHECK-NEXT: ret i4 [[D]] +; ALL-LABEL: @pr33078_3( +; ALL-NEXT: [[B:%.*]] = sext i8 [[A:%.*]] to i16 +; ALL-NEXT: [[C:%.*]] = lshr i16 [[B]], 12 +; ALL-NEXT: [[D:%.*]] = trunc i16 [[C]] to i4 +; ALL-NEXT: ret i4 [[D]] ; %B = sext i8 %A to i16 %C = lshr i16 %B, 12 @@ -1518,11 +1527,11 @@ define i4 @pr33078_3(i8 %A) { define i8 @pr33078_4(i3 %x) { ; Don't turn this in an `ashr`. This was getting miscompiled -; CHECK-LABEL: @pr33078_4( -; CHECK-NEXT: [[B:%.*]] = sext i3 [[X:%.*]] to i16 -; CHECK-NEXT: [[C:%.*]] = lshr i16 [[B]], 13 -; CHECK-NEXT: [[D:%.*]] = trunc i16 [[C]] to i8 -; CHECK-NEXT: ret i8 [[D]] +; ALL-LABEL: @pr33078_4( +; ALL-NEXT: [[B:%.*]] = sext i3 [[X:%.*]] to i16 +; ALL-NEXT: [[C:%.*]] = lshr i16 [[B]], 13 +; ALL-NEXT: [[D:%.*]] = trunc i16 [[C]] to i8 +; ALL-NEXT: ret i8 [[D]] ; %B = sext i3 %x to i16 %C = lshr i16 %B, 13 @@ -1532,10 +1541,10 @@ define i8 @pr33078_4(i3 %x) { ; (sext (xor (cmp), -1)) -> (sext (!cmp)) define i64 @test94(i32 %a) { -; CHECK-LABEL: @test94( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[A:%.*]], -2 -; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i64 -; CHECK-NEXT: ret i64 [[TMP2]] +; ALL-LABEL: @test94( +; ALL-NEXT: [[TMP1:%.*]] = icmp ne i32 [[A:%.*]], -2 +; ALL-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i64 +; ALL-NEXT: ret i64 [[TMP2]] ; %1 = icmp eq i32 %a, -2 %2 = sext i1 %1 to i8 @@ -1546,11 +1555,11 @@ define i64 @test94(i32 %a) { ; We should be able to remove the zext and trunc here. define i32 @test95(i32 %x) { -; CHECK-LABEL: @test95( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], 40 -; CHECK-NEXT: ret i32 [[TMP3]] +; ALL-LABEL: @test95( +; ALL-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 +; ALL-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2 +; ALL-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], 40 +; ALL-NEXT: ret i32 [[TMP3]] ; %1 = trunc i32 %x to i8 %2 = lshr i8 %1, 6 From bc7f1df6b61a3c8f88f2541ef9ba73f4ee0ee4fe Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 29 Nov 2019 13:02:41 +0100 Subject: [PATCH 224/591] [lldb][NFC] Explicitly ask for a ClangASTContext in ClangASTSource ClangASTSource currently takes a clang::ASTContext and keeps that around, but a lot of LLDB's functionality for doing operations on a clang::ASTContext is in its ClangASTContext twin class. We currently constantly recompute the respective ClangASTContext from the clang::ASTContext while we instead could just pass and store a ClangASTContext in the ClangASTSource. This also allows us to get rid of a bunch of unreachable error checking for cases where recomputation fails for some reason. --- .../ExpressionParser/Clang/ClangASTSource.cpp | 21 +++++++-------- .../ExpressionParser/Clang/ClangASTSource.h | 4 ++- .../Clang/ClangExpressionDeclMap.cpp | 27 +++++++------------ .../Clang/ClangExpressionParser.cpp | 2 +- lldb/source/Symbol/ClangASTContext.cpp | 2 +- 5 files changed, 25 insertions(+), 31 deletions(-) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index 2b484db3a188e..51540902e2dcc 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -57,10 +57,11 @@ ClangASTSource::ClangASTSource(const lldb::TargetSP &target) } } -void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context, +void ClangASTSource::InstallASTContext(ClangASTContext &clang_ast_context, clang::FileManager &file_manager, bool is_shared_context) { - m_ast_context = &ast_context; + m_ast_context = clang_ast_context.getASTContext(); + m_clang_ast_context = &clang_ast_context; m_file_manager = &file_manager; if (m_target->GetUseModernTypeLookup()) { // Configure the ExternalASTMerger. The merger needs to be able to import @@ -69,7 +70,7 @@ void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context, // AST contexts. lldbassert(!m_merger_up); - clang::ExternalASTMerger::ImporterTarget target = {ast_context, + clang::ExternalASTMerger::ImporterTarget target = {*m_ast_context, file_manager}; std::vector sources; for (lldb::ModuleSP module_sp : m_target->GetImages().Modules()) { @@ -132,7 +133,7 @@ void ClangASTSource::InstallASTContext(clang::ASTContext &ast_context, m_merger_up = std::make_unique(target, sources); } else { - m_ast_importer_sp->InstallMapCompleter(&ast_context, *this); + m_ast_importer_sp->InstallMapCompleter(m_ast_context, *this); } } @@ -775,7 +776,7 @@ void ClangASTSource::FindExternalVisibleDecls(NameSearchContext &context) { } clang::Sema *ClangASTSource::getSema() { - return ClangASTContext::GetASTContext(m_ast_context)->getSema(); + return m_clang_ast_context->getSema(); } bool ClangASTSource::IgnoreName(const ConstString name, @@ -2058,8 +2059,7 @@ CompilerType ClangASTSource::GuardedCopyType(const CompilerType &src_type) { // seems to be generating bad types on occasion. return CompilerType(); - return CompilerType(ClangASTContext::GetASTContext(m_ast_context), - copied_qual_type.getAsOpaquePtr()); + return CompilerType(m_clang_ast_context, copied_qual_type.getAsOpaquePtr()); } clang::NamedDecl *NameSearchContext::AddVarDecl(const CompilerType &type) { @@ -2186,10 +2186,9 @@ clang::NamedDecl *NameSearchContext::AddGenericFunDecl() { ArrayRef(), // argument types proto_info)); - return AddFunDecl( - CompilerType(ClangASTContext::GetASTContext(m_ast_source.m_ast_context), - generic_function_type.getAsOpaquePtr()), - true); + return AddFunDecl(CompilerType(m_ast_source.m_clang_ast_context, + generic_function_type.getAsOpaquePtr()), + true); } clang::NamedDecl * diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h index d8e784f49b10e..194233e4a028e 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h @@ -57,7 +57,7 @@ class ClangASTSource : public ClangExternalASTSourceCommon, } void MaterializeVisibleDecls(const clang::DeclContext *DC) { return; } - void InstallASTContext(clang::ASTContext &ast_context, + void InstallASTContext(ClangASTContext &ast_context, clang::FileManager &file_manager, bool is_shared_context = false); @@ -408,6 +408,8 @@ class ClangASTSource : public ClangExternalASTSourceCommon, const lldb::TargetSP m_target; /// The AST context requests are coming in for. clang::ASTContext *m_ast_context; + /// The ClangASTContext for m_ast_context. + ClangASTContext *m_clang_ast_context; /// The file manager paired with the AST context. clang::FileManager *m_file_manager; /// The target's AST importer. diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index 4966ac1640feb..b33547529debb 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -1076,12 +1076,9 @@ void ClangExpressionDeclMap::LookupLocalVarNamespace( if (!frame_ast) return; - ClangASTContext *map_ast = ClangASTContext::GetASTContext(m_ast_context); - if (!map_ast) - return; - - clang::NamespaceDecl *namespace_decl = map_ast->GetUniqueNamespaceDeclaration( - g_lldb_local_vars_namespace_cstr, nullptr); + clang::NamespaceDecl *namespace_decl = + m_clang_ast_context->GetUniqueNamespaceDeclaration( + g_lldb_local_vars_namespace_cstr, nullptr); if (!namespace_decl) return; @@ -1724,8 +1721,7 @@ void ClangExpressionDeclMap::AddOneGenericVariable(NameSearchContext &context, TypeFromUser user_type(scratch_ast_context->GetBasicType(eBasicTypeVoid) .GetPointerType() .GetLValueReferenceType()); - ClangASTContext *own_context = ClangASTContext::GetASTContext(m_ast_context); - TypeFromParser parser_type(own_context->GetBasicType(eBasicTypeVoid) + TypeFromParser parser_type(m_clang_ast_context->GetBasicType(eBasicTypeVoid) .GetPointerType() .GetLValueReferenceType()); NamedDecl *var_decl = context.AddVarDecl(parser_type); @@ -2003,9 +1999,8 @@ void ClangExpressionDeclMap::AddThisType(NameSearchContext &context, if (copied_clang_type.IsAggregateType() && copied_clang_type.GetCompleteType()) { - ClangASTContext *own_context = - ClangASTContext::GetASTContext(m_ast_context); - CompilerType void_clang_type = own_context->GetBasicType(eBasicTypeVoid); + CompilerType void_clang_type = + m_clang_ast_context->GetBasicType(eBasicTypeVoid); CompilerType void_ptr_clang_type = void_clang_type.GetPointerType(); CompilerType method_type = ClangASTContext::CreateFunctionType( @@ -2018,12 +2013,10 @@ void ClangExpressionDeclMap::AddThisType(NameSearchContext &context, const bool is_attr_used = true; const bool is_artificial = false; - CXXMethodDecl *method_decl = - ClangASTContext::GetASTContext(m_ast_context) - ->AddMethodToCXXRecordType( - copied_clang_type.GetOpaqueQualType(), "$__lldb_expr", nullptr, - method_type, lldb::eAccessPublic, is_virtual, is_static, - is_inline, is_explicit, is_attr_used, is_artificial); + CXXMethodDecl *method_decl = m_clang_ast_context->AddMethodToCXXRecordType( + copied_clang_type.GetOpaqueQualType(), "$__lldb_expr", nullptr, + method_type, lldb::eAccessPublic, is_virtual, is_static, is_inline, + is_explicit, is_attr_used, is_artificial); LLDB_LOG(log, " CEDM::AddThisType Added function $__lldb_expr " diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index a0f966ddd5111..15b242a8b87ee 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -997,7 +997,7 @@ ClangExpressionParser::ParseInternal(DiagnosticManager &diagnostic_manager, } else { ast_context.setExternalSource(ast_source); } - decl_map->InstallASTContext(ast_context, m_compiler->getFileManager()); + decl_map->InstallASTContext(*m_ast_context, m_compiler->getFileManager()); } // Check that the ASTReader is properly attached to ASTContext and Sema. diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index adb8d57a74f65..9988f06156518 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -598,7 +598,7 @@ lldb::TypeSystemSP ClangASTContext::CreateInstance(lldb::LanguageType language, new ClangASTSource(target->shared_from_this())); lldbassert(ast_sp->getFileManager()); ast_sp->m_scratch_ast_source_up->InstallASTContext( - *ast_sp->getASTContext(), *ast_sp->getFileManager(), true); + *ast_sp, *ast_sp->getFileManager(), true); llvm::IntrusiveRefCntPtr proxy_ast_source( ast_sp->m_scratch_ast_source_up->CreateProxy()); ast_sp->SetExternalSource(proxy_ast_source); From c214c92f3be7c15abc458f23c7be05a5790e6aed Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 29 Nov 2019 13:43:23 +0100 Subject: [PATCH 225/591] [lldb][NFC] Remove ClangASTContext::GetBuiltinTypeForEncodingAndBitSize overload --- lldb/include/lldb/Symbol/ClangASTContext.h | 3 - .../Clang/ClangExpressionDeclMap.cpp | 4 +- lldb/source/Symbol/ClangASTContext.cpp | 55 +++++++------------ lldb/unittests/Symbol/TestClangASTContext.cpp | 38 ++++++------- 4 files changed, 41 insertions(+), 59 deletions(-) diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h index 7018f3b71b4f4..a55307ef632da 100644 --- a/lldb/include/lldb/Symbol/ClangASTContext.h +++ b/lldb/include/lldb/Symbol/ClangASTContext.h @@ -150,9 +150,6 @@ class ClangASTContext : public TypeSystem { CompilerType GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding, size_t bit_size) override; - static CompilerType GetBuiltinTypeForEncodingAndBitSize( - clang::ASTContext *ast, lldb::Encoding encoding, uint32_t bit_size); - CompilerType GetBasicType(lldb::BasicType type); CompilerType GetBasicType(ConstString name); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index b33547529debb..22966e8023d68 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -1762,8 +1762,8 @@ void ClangExpressionDeclMap::AddOneRegister(NameSearchContext &context, Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS)); CompilerType clang_type = - ClangASTContext::GetBuiltinTypeForEncodingAndBitSize( - m_ast_context, reg_info->encoding, reg_info->byte_size * 8); + m_clang_ast_context->GetBuiltinTypeForEncodingAndBitSize( + reg_info->encoding, reg_info->byte_size * 8); if (!clang_type) { LLDB_LOGF(log, " Tried to add a type for %s, but couldn't get one", diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index 9988f06156518..8428dfe8c4fa3 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -843,77 +843,62 @@ static inline bool QualTypeMatchesBitSize(const uint64_t bit_size, CompilerType ClangASTContext::GetBuiltinTypeForEncodingAndBitSize(Encoding encoding, size_t bit_size) { - return ClangASTContext::GetBuiltinTypeForEncodingAndBitSize( - getASTContext(), encoding, bit_size); -} - -CompilerType ClangASTContext::GetBuiltinTypeForEncodingAndBitSize( - ASTContext *ast, Encoding encoding, uint32_t bit_size) { - auto *clang_ast_context = ClangASTContext::GetASTContext(ast); + ASTContext *ast = this->getASTContext(); if (!ast) return CompilerType(); switch (encoding) { case eEncodingInvalid: if (QualTypeMatchesBitSize(bit_size, ast, ast->VoidPtrTy)) - return CompilerType(clang_ast_context, ast->VoidPtrTy.getAsOpaquePtr()); + return CompilerType(this, ast->VoidPtrTy.getAsOpaquePtr()); break; case eEncodingUint: if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedCharTy)) - return CompilerType(clang_ast_context, - ast->UnsignedCharTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedCharTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedShortTy)) - return CompilerType(clang_ast_context, - ast->UnsignedShortTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedShortTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedIntTy)) - return CompilerType(clang_ast_context, - ast->UnsignedIntTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedIntTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedLongTy)) - return CompilerType(clang_ast_context, - ast->UnsignedLongTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedLongTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedLongLongTy)) - return CompilerType(clang_ast_context, - ast->UnsignedLongLongTy.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedLongLongTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->UnsignedInt128Ty)) - return CompilerType(clang_ast_context, - ast->UnsignedInt128Ty.getAsOpaquePtr()); + return CompilerType(this, ast->UnsignedInt128Ty.getAsOpaquePtr()); break; case eEncodingSint: if (QualTypeMatchesBitSize(bit_size, ast, ast->SignedCharTy)) - return CompilerType(clang_ast_context, - ast->SignedCharTy.getAsOpaquePtr()); + return CompilerType(this, ast->SignedCharTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->ShortTy)) - return CompilerType(clang_ast_context, ast->ShortTy.getAsOpaquePtr()); + return CompilerType(this, ast->ShortTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->IntTy)) - return CompilerType(clang_ast_context, ast->IntTy.getAsOpaquePtr()); + return CompilerType(this, ast->IntTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->LongTy)) - return CompilerType(clang_ast_context, ast->LongTy.getAsOpaquePtr()); + return CompilerType(this, ast->LongTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->LongLongTy)) - return CompilerType(clang_ast_context, ast->LongLongTy.getAsOpaquePtr()); + return CompilerType(this, ast->LongLongTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->Int128Ty)) - return CompilerType(clang_ast_context, ast->Int128Ty.getAsOpaquePtr()); + return CompilerType(this, ast->Int128Ty.getAsOpaquePtr()); break; case eEncodingIEEE754: if (QualTypeMatchesBitSize(bit_size, ast, ast->FloatTy)) - return CompilerType(clang_ast_context, ast->FloatTy.getAsOpaquePtr()); + return CompilerType(this, ast->FloatTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->DoubleTy)) - return CompilerType(clang_ast_context, ast->DoubleTy.getAsOpaquePtr()); + return CompilerType(this, ast->DoubleTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->LongDoubleTy)) - return CompilerType(clang_ast_context, - ast->LongDoubleTy.getAsOpaquePtr()); + return CompilerType(this, ast->LongDoubleTy.getAsOpaquePtr()); if (QualTypeMatchesBitSize(bit_size, ast, ast->HalfTy)) - return CompilerType(clang_ast_context, ast->HalfTy.getAsOpaquePtr()); + return CompilerType(this, ast->HalfTy.getAsOpaquePtr()); break; case eEncodingVector: // Sanity check that bit_size is a multiple of 8's. if (bit_size && !(bit_size & 0x7u)) return CompilerType( - clang_ast_context, - ast->getExtVectorType(ast->UnsignedCharTy, bit_size / 8) - .getAsOpaquePtr()); + this, ast->getExtVectorType(ast->UnsignedCharTy, bit_size / 8) + .getAsOpaquePtr()); break; } diff --git a/lldb/unittests/Symbol/TestClangASTContext.cpp b/lldb/unittests/Symbol/TestClangASTContext.cpp index 44a824636cf73..8fb24acc7a6a1 100644 --- a/lldb/unittests/Symbol/TestClangASTContext.cpp +++ b/lldb/unittests/Symbol/TestClangASTContext.cpp @@ -169,10 +169,12 @@ TEST_F(TestClangASTContext, TestGetBasicTypeFromName) { EXPECT_EQ(GetBasicQualType(eBasicTypeNullPtr), GetBasicQualType("nullptr")); } -void VerifyEncodingAndBitSize(clang::ASTContext *context, +void VerifyEncodingAndBitSize(ClangASTContext &clang_context, lldb::Encoding encoding, unsigned int bit_size) { - CompilerType type = ClangASTContext::GetBuiltinTypeForEncodingAndBitSize( - context, encoding, bit_size); + clang::ASTContext *context = clang_context.getASTContext(); + + CompilerType type = + clang_context.GetBuiltinTypeForEncodingAndBitSize(encoding, bit_size); EXPECT_TRUE(type.IsValid()); QualType qtype = ClangUtil::GetQualType(type); @@ -206,8 +208,6 @@ void VerifyEncodingAndBitSize(clang::ASTContext *context, } TEST_F(TestClangASTContext, TestBuiltinTypeForEncodingAndBitSize) { - clang::ASTContext *context = m_ast->getASTContext(); - // Make sure we can get types of every possible size in every possible // encoding. // We can't make any guarantee about which specific type we get, because the @@ -215,20 +215,20 @@ TEST_F(TestClangASTContext, TestBuiltinTypeForEncodingAndBitSize) { // isn't that specific. We only need to make sure the compiler hands us some // type that // is both a builtin type and matches the requested bit size. - VerifyEncodingAndBitSize(context, eEncodingSint, 8); - VerifyEncodingAndBitSize(context, eEncodingSint, 16); - VerifyEncodingAndBitSize(context, eEncodingSint, 32); - VerifyEncodingAndBitSize(context, eEncodingSint, 64); - VerifyEncodingAndBitSize(context, eEncodingSint, 128); - - VerifyEncodingAndBitSize(context, eEncodingUint, 8); - VerifyEncodingAndBitSize(context, eEncodingUint, 16); - VerifyEncodingAndBitSize(context, eEncodingUint, 32); - VerifyEncodingAndBitSize(context, eEncodingUint, 64); - VerifyEncodingAndBitSize(context, eEncodingUint, 128); - - VerifyEncodingAndBitSize(context, eEncodingIEEE754, 32); - VerifyEncodingAndBitSize(context, eEncodingIEEE754, 64); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 8); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 16); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 32); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 64); + VerifyEncodingAndBitSize(*m_ast, eEncodingSint, 128); + + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 8); + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 16); + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 32); + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 64); + VerifyEncodingAndBitSize(*m_ast, eEncodingUint, 128); + + VerifyEncodingAndBitSize(*m_ast, eEncodingIEEE754, 32); + VerifyEncodingAndBitSize(*m_ast, eEncodingIEEE754, 64); } TEST_F(TestClangASTContext, TestIsClangType) { From 8059188c45f049b52b779d6684ea78b6ef8b168c Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Fri, 29 Nov 2019 14:08:01 +0100 Subject: [PATCH 226/591] [lldb][NFC] Remove unused ClangASTContext::GetBasicType(ConstString) --- lldb/include/lldb/Symbol/ClangASTContext.h | 2 -- lldb/source/Symbol/ClangASTContext.cpp | 5 ----- 2 files changed, 7 deletions(-) diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h index a55307ef632da..b2c284282f11e 100644 --- a/lldb/include/lldb/Symbol/ClangASTContext.h +++ b/lldb/include/lldb/Symbol/ClangASTContext.h @@ -152,8 +152,6 @@ class ClangASTContext : public TypeSystem { CompilerType GetBasicType(lldb::BasicType type); - CompilerType GetBasicType(ConstString name); - static lldb::BasicType GetBasicTypeEnumeration(ConstString name); CompilerType GetBuiltinTypeForDWARFEncodingAndBitSize(const char *type_name, diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index 8428dfe8c4fa3..e683a0a9f4bec 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -971,11 +971,6 @@ ClangASTContext::GetBasicTypeEnumeration(ConstString name) { return eBasicTypeInvalid; } -CompilerType ClangASTContext::GetBasicType(ConstString name) { - lldb::BasicType basic_type = ClangASTContext::GetBasicTypeEnumeration(name); - return GetBasicType(basic_type); -} - uint32_t ClangASTContext::GetPointerByteSize() { if (m_pointer_byte_size == 0) if (auto size = GetBasicType(lldb::eBasicTypeVoid) From 45c843de4eb8a0d2aa42543b4fcfefcbca4191d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 16 Oct 2019 00:08:28 +0300 Subject: [PATCH 227/591] [LLDB] [ARM] Use r11 as frame pointer on Windows on ARM Extend EmulateMOVRdRm to identify "mov r11, sp" in thumb mode as setting the frame pointer, if r11 is the frame pointer register. Differential Revision: https://reviews.llvm.org/D70797 --- .../Instruction/ARM/EmulateInstructionARM.cpp | 8 +- .../Windows/Inputs/arm-fp-unwind.dmp.yaml | 37 ++++++++ .../Windows/Inputs/arm-fp-unwind.exe.yaml | 92 +++++++++++++++++++ .../Shell/Minidump/Windows/arm-fp-unwind.test | 17 ++++ 4 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.dmp.yaml create mode 100644 lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.exe.yaml create mode 100644 lldb/test/Shell/Minidump/Windows/arm-fp-unwind.test diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp index 19a987b0f0042..ff142e6f35ff2 100644 --- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp +++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp @@ -850,6 +850,7 @@ uint32_t EmulateInstructionARM::GetFramePointerRegisterNumber() const { /* On Apple iOS et al, the frame pointer register is always r7. * Typically on other ARM systems, thumb code uses r7; arm code uses r11. + * Windows on ARM, which is in thumb mode, uses r11 though. */ uint32_t fp_regnum = 11; @@ -857,7 +858,7 @@ uint32_t EmulateInstructionARM::GetFramePointerRegisterNumber() const { if (is_apple) fp_regnum = 7; - if (m_opcode_mode == eModeThumb) + if (m_opcode_mode == eModeThumb && !m_arch.GetTriple().isOSWindows()) fp_regnum = 7; return fp_regnum; @@ -879,6 +880,7 @@ uint32_t EmulateInstructionARM::GetFramePointerDWARFRegisterNumber() const { /* On Apple iOS et al, the frame pointer register is always r7. * Typically on other ARM systems, thumb code uses r7; arm code uses r11. + * Windows on ARM, which is in thumb mode, uses r11 though. */ uint32_t fp_regnum = dwarf_r11; @@ -886,7 +888,7 @@ uint32_t EmulateInstructionARM::GetFramePointerDWARFRegisterNumber() const { if (is_apple) fp_regnum = dwarf_r7; - if (m_opcode_mode == eModeThumb) + if (m_opcode_mode == eModeThumb && !m_arch.GetTriple().isOSWindows()) fp_regnum = dwarf_r7; return fp_regnum; @@ -1343,6 +1345,8 @@ bool EmulateInstructionARM::EmulateMOVRdRm(const uint32_t opcode, EmulateInstruction::Context context; if (Rd == 13) context.type = EmulateInstruction::eContextAdjustStackPointer; + else if (Rd == GetFramePointerRegisterNumber() && Rm == 13) + context.type = EmulateInstruction::eContextSetFramePointer; else context.type = EmulateInstruction::eContextRegisterPlusOffset; RegisterInfo dwarf_reg; diff --git a/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.dmp.yaml b/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.dmp.yaml new file mode 100644 index 0000000000000..330a761d88b4c --- /dev/null +++ b/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.dmp.yaml @@ -0,0 +1,37 @@ +--- !minidump +Version: 0xA0BAA793 +Flags: 0x0000000000000800 +Streams: + - Type: ThreadList + Threads: + - Thread Id: 0x00004034 + Suspend Count: 0x00000001 + Priority Class: 0x00000020 + Environment Block: 0x00000000007E6000 + Context: 0000000000000000 + Stack: + Start of Memory Range: 0x00000000008FF758 + Content: 00000000000000 + - Type: ModuleList + Modules: + - Base of Image: 0x0000000000C70000 + Size of Image: 0x00002000 + Time Date Stamp: 1574942531 + Module Name: 'arm-fp-unwind.exe' + CodeView Record: '' + Reserved0: 0x0000000000008140 + - Type: SystemInfo + Processor Arch: ARM + Processor Level: 2049 + Processor Revision: 2564 + Number of Processors: 8 + Product type: 1 + Major Version: 10 + Build Number: 18362 + Platform ID: Win32NT + Suite Mask: 0x0100 + CPU: + CPUID: 0xEB8C1004 + - Type: MiscInfo + Content: 54050000F7010000183800002EB9DF5D00000000000000006C0700002B0100006C0700000400000003000000002000000D000000000000000100000088FFFFFF46004C00450020005300740061006E0064006100720064002000540069006D00650000000000000000000000000000000000000000000000000000000000000000000A000000050004000000000000000000000046004C00450020004400610079006C0069006700680074002000540069006D00650000000000000000000000000000000000000000000000000000000000000000000300000005000300000000000000C4FFFFFF310038003300360032002E003200330039002E00610072006D006600720065002E0031003900680031005F00720065006C0065006100730065005F007300760063005F00700072006F00640031002E003100390030003600320038002D0031003600340031000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000064006200670063006F00720065002E0077006F0061002C00310030002E0030002E00310038003300360032002E003100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +... diff --git a/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.exe.yaml b/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.exe.yaml new file mode 100644 index 0000000000000..f3229060635f2 --- /dev/null +++ b/lldb/test/Shell/Minidump/Windows/Inputs/arm-fp-unwind.exe.yaml @@ -0,0 +1,92 @@ +--- !COFF +OptionalHeader: + AddressOfEntryPoint: 4097 + ImageBase: 4194304 + SectionAlignment: 4096 + FileAlignment: 512 + MajorOperatingSystemVersion: 6 + MinorOperatingSystemVersion: 0 + MajorImageVersion: 0 + MinorImageVersion: 0 + MajorSubsystemVersion: 6 + MinorSubsystemVersion: 0 + Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI + DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE ] + SizeOfStackReserve: 1048576 + SizeOfStackCommit: 4096 + SizeOfHeapReserve: 1048576 + SizeOfHeapCommit: 4096 + ExportTable: + RelativeVirtualAddress: 0 + Size: 0 + ImportTable: + RelativeVirtualAddress: 0 + Size: 0 + ResourceTable: + RelativeVirtualAddress: 0 + Size: 0 + ExceptionTable: + RelativeVirtualAddress: 0 + Size: 0 + CertificateTable: + RelativeVirtualAddress: 0 + Size: 0 + BaseRelocationTable: + RelativeVirtualAddress: 0 + Size: 0 + Debug: + RelativeVirtualAddress: 0 + Size: 0 + Architecture: + RelativeVirtualAddress: 0 + Size: 0 + GlobalPtr: + RelativeVirtualAddress: 0 + Size: 0 + TlsTable: + RelativeVirtualAddress: 0 + Size: 0 + LoadConfigTable: + RelativeVirtualAddress: 0 + Size: 0 + BoundImport: + RelativeVirtualAddress: 0 + Size: 0 + IAT: + RelativeVirtualAddress: 0 + Size: 0 + DelayImportDescriptor: + RelativeVirtualAddress: 0 + Size: 0 + ClrRuntimeHeader: + RelativeVirtualAddress: 0 + Size: 0 +header: + Machine: IMAGE_FILE_MACHINE_ARMNT + Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_32BIT_MACHINE ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + VirtualAddress: 4096 + VirtualSize: 38 + SectionData: 2DE90048EB46ADF5007D684600F004F80DF5007DBDE8008800BE01784278415C805C08447047 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: entry + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: other + Value: 24 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_FUNCTION + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/lldb/test/Shell/Minidump/Windows/arm-fp-unwind.test b/lldb/test/Shell/Minidump/Windows/arm-fp-unwind.test new file mode 100644 index 0000000000000..35ea7c8a9de0e --- /dev/null +++ b/lldb/test/Shell/Minidump/Windows/arm-fp-unwind.test @@ -0,0 +1,17 @@ +Test that unwind plans use the frame pointer register correctly. + +REQUIRES: arm + +RUN: yaml2obj %S/Inputs/arm-fp-unwind.exe.yaml > %T/arm-fp-unwind.exe +RUN: yaml2obj %S/Inputs/arm-fp-unwind.dmp.yaml > %T/arm-fp-unwind.dmp +RUN: %lldb -O "settings set target.exec-search-paths %T" \ +RUN: -c %T/arm-fp-unwind.dmp -o "image show-unwind -a 0x00c71010" -b \ +RUN: | FileCheck %s + +CHECK: Assembly language inspection UnwindPlan: +CHECK-NEXT: This UnwindPlan originally sourced from EmulateInstructionARM +CHECK-NEXT: This UnwindPlan is sourced from the compiler: no. +CHECK-NEXT: This UnwindPlan is valid at all instruction locations: yes. +CHECK-NEXT: row[0]: 0: CFA=sp +0 => +CHECK-NEXT: row[1]: 4: CFA=sp +8 => fp=[CFA-8] lr=[CFA-4] +CHECK-NEXT: row[2]: 6: CFA=fp +8 => fp=[CFA-8] lr=[CFA-4] From 19daa21f841ad45290c923689ee3d25198651a4c Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 20 Nov 2019 23:25:17 +0100 Subject: [PATCH 228/591] [clangd] Rethink how SelectionTree deals with macros and #includes. Summary: The exclusive-claim model is successful at resolving conflicts over tokens between parent/child or siblings. However claims at the spelled-token level do the wrong thing for macro expansions, where siblings can be equally associated with the macro invocation. Moreover, any model that only uses the endpoints in a range can fail when a macro invocation occurs inside the node. To address this, we use the existing TokenBuffer in more depth. Claims are expressed in terms of expanded tokens, so there is no need to worry about macros, includes etc. Once we know which expanded tokens were claimed, they are mapped onto spelled tokens for hit-testing. This mapping is fairly flexible, currently the handling of macros is pretty simple (map macro args onto spellings, other macro expansions onto the macro name token). This mapping is in principle token-by-token for correctness (though there's some batching for performance). The aggregation of the selection enum is now more principled as we need to be able to aggregate several hit-test results together. For simplicity i removed the ability to determine selectedness of TUDecl. (That was originally implemented in 90a5bf92ff97b1, but doesn't seem to be very important or worth the complexity any longer). The expandedTokens(SourceLocation) helper could be added locally, but seems to make sense on TokenBuffer. Fixes https://github.com/clangd/clangd/issues/202 Fixes https://github.com/clangd/clangd/issues/126 Reviewers: hokein Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits, ilya-biryukov Tags: #clang Differential Revision: https://reviews.llvm.org/D70512 --- clang-tools-extra/clangd/Selection.cpp | 414 ++++++++++++------ clang-tools-extra/clangd/Selection.h | 2 +- .../clangd/unittests/SelectionTests.cpp | 66 ++- .../clangd/unittests/TweakTests.cpp | 26 +- clang/include/clang/Tooling/Syntax/Tokens.h | 5 + clang/lib/Tooling/Syntax/Tokens.cpp | 16 + clang/unittests/Tooling/Syntax/TokensTest.cpp | 15 + 7 files changed, 391 insertions(+), 153 deletions(-) diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index 5b29b916b33ce..54b182b3c7060 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -34,95 +34,283 @@ namespace { using Node = SelectionTree::Node; using ast_type_traits::DynTypedNode; -// Identifies which tokens are selected, and evaluates claims of source ranges -// by AST nodes. Tokens may be claimed only once: first-come, first-served. -class SelectedTokens { +// An IntervalSet maintains a set of disjoint subranges of an array. +// +// Initially, it contains the entire array. +// [-----------------------------------------------------------] +// +// When a range is erased(), it will typically split the array in two. +// Claim: [--------------------] +// after: [----------------] [-------------------] +// +// erase() returns the segments actually erased. Given the state above: +// Claim: [---------------------------------------] +// Out: [---------] [------] +// After: [-----] [-----------] +// +// It is used to track (expanded) tokens not yet associated with an AST node. +// On traversing an AST node, its token range is erased from the unclaimed set. +// The tokens actually removed are associated with that node, and hit-tested +// against the selection to determine whether the node is selected. +template +class IntervalSet { +public: + IntervalSet(llvm::ArrayRef Range) : UnclaimedRanges(&rangeLess) { + UnclaimedRanges.insert(Range); + } + + // Removes the elements of Claim from the set, modifying or removing ranges + // that overlap it. + // Returns the continuous subranges of Claim that were actually removed. + llvm::SmallVector, 4> erase(llvm::ArrayRef Claim) { + llvm::SmallVector, 4> Out; + if (Claim.empty()) + return Out; + // equal_range finds overlapping ranges, because of how we chose <. + auto Overlap = UnclaimedRanges.equal_range(Claim); + if (Overlap.first == Overlap.second) + return Out; + + // General case: + // Claim: [-----------------] + // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-] + // Overlap: ^first ^second + // Ranges C and D are fully included. Ranges B and E must be trimmed. + + // First, copy all overlapping ranges into the output. + auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second); + // If any of the overlapping ranges were sliced by the claim, split them: + // - restrict the returned range to the claimed part + // - save the unclaimed part so it can be reinserted + llvm::ArrayRef RemainingHead, RemainingTail; + if (Claim.begin() > OutFirst->begin()) { + RemainingHead = {OutFirst->begin(), Claim.begin()}; + *OutFirst = {Claim.begin(), OutFirst->end()}; + } + if (Claim.end() < Out.back().end()) { + RemainingTail = {Claim.end(), Out.back().end()}; + Out.back() = {Out.back().begin(), Claim.end()}; + } + + // Erase all the overlapping ranges (invalidating all iterators). + UnclaimedRanges.erase(Overlap.first, Overlap.second); + // Reinsert ranges that were merely trimmed. + if (!RemainingHead.empty()) + UnclaimedRanges.insert(RemainingHead); + if (!RemainingTail.empty()) + UnclaimedRanges.insert(RemainingTail); + + return Out; + } + +private: + using TokenRange = llvm::ArrayRef; + // Given that the ranges we insert are disjoint, there are several ways to + // legally define range < range. + // We choose to define it so overlapping ranges compare equal. + static bool rangeLess(llvm::ArrayRef L, llvm::ArrayRef R) { + return L.end() <= R.begin(); + } + + // Disjoint sorted unclaimed ranges of expanded tokens. + std::set, decltype(&rangeLess)> UnclaimedRanges; +}; + +// Sentinel value for the selectedness of a node where we've seen no tokens yet. +// This resolves to Unselected if no tokens are ever seen. +// But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete. +// This value is never exposed publicly. +constexpr SelectionTree::Selection NoTokens = + static_cast( + static_cast(SelectionTree::Complete + 1)); + +// Nodes start with NoTokens, and then use this function to aggregate the +// selectedness as more tokens are found. +void update(SelectionTree::Selection &Result, SelectionTree::Selection New) { + if (New == NoTokens) + return; + if (Result == NoTokens) + Result = New; + else if (Result != New) + // Can only be completely selected (or unselected) if all tokens are. + Result = SelectionTree::Partial; +} + + +// SelectionTester can determine whether a range of tokens from the PP-expanded +// stream (corresponding to an AST node) is considered selected. +// +// When the tokens result from macro expansions, the appropriate tokens in the +// main file are examined (macro invocation or args). Similarly for #includes. +// +// It tests each token in the range (not just the endpoints) as contiguous +// expanded tokens may not have contiguous spellings (with macros). +// +// Non-token text, and tokens not modeled in the AST (comments, semicolons) +// are ignored when determining selectedness. +class SelectionTester { public: - SelectedTokens(llvm::ArrayRef Spelled, const SourceManager &SM, - unsigned SelBegin, unsigned SelEnd) - : SelBegin(SelBegin), SelEnd(SelEnd) { - // Extract bounds and selected-ness for all tokens spelled in the file. - Tokens.reserve(Spelled.size()); - for (const auto& Tok : Spelled) { + // The selection is offsets [SelBegin, SelEnd) in SelFile. + SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile, + unsigned SelBegin, unsigned SelEnd, const SourceManager &SM) + : SelFile(SelFile), SM(SM) { + // Find all tokens (partially) selected in the file. + auto AllSpelledTokens = Buf.spelledTokens(SelFile); + const syntax::Token *SelFirst = + llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) { + return SM.getFileOffset(Tok.endLocation()) <= SelBegin; + }); + const syntax::Token *SelLimit = std::partition_point( + SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) { + return SM.getFileOffset(Tok.location()) < SelEnd; + }); + // Precompute selectedness and offset for selected spelled tokens. + for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) { // As well as comments, don't count semicolons as real tokens. // They're not properly claimed as expr-statement is missing from the AST. - if (Tok.kind() == tok::comment || Tok.kind() == tok::semi) + if (T->kind() == tok::comment || T->kind() == tok::semi) continue; - - Tokens.emplace_back(); - TokInfo &S = Tokens.back(); - S.StartOffset = SM.getFileOffset(Tok.location()); - S.EndOffset = S.StartOffset + Tok.length(); - if (S.StartOffset >= SelBegin && S.EndOffset <= SelEnd) + SpelledTokens.emplace_back(); + Tok &S = SpelledTokens.back(); + S.Offset = SM.getFileOffset(T->location()); + if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd) S.Selected = SelectionTree::Complete; - else if (S.EndOffset > SelBegin && S.StartOffset < SelEnd) - S.Selected = SelectionTree::Partial; else - S.Selected = SelectionTree::Unselected; - S.Claimed = false; + S.Selected = SelectionTree::Partial; } } - // Associates any tokens overlapping [Begin, End) with an AST node. - // Tokens that were already claimed by another AST node are not claimed again. - // Updates Result if the node is selected in the sense of SelectionTree. - void claim(unsigned Begin, unsigned End, SelectionTree::Selection &Result) { - assert(Begin <= End); + // Test whether a consecutive range of tokens is selected. + // The tokens are taken from the expanded token stream. + SelectionTree::Selection + test(llvm::ArrayRef ExpandedTokens) const { + if (SpelledTokens.empty()) + return NoTokens; + SelectionTree::Selection Result = NoTokens; + while (!ExpandedTokens.empty()) { + // Take consecutive tokens from the same context together for efficiency. + FileID FID = SM.getFileID(ExpandedTokens.front().location()); + auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) { + return SM.getFileID(T.location()) == FID; + }); + assert(!Batch.empty()); + ExpandedTokens = ExpandedTokens.drop_front(Batch.size()); + + update(Result, testChunk(FID, Batch)); + } + return Result; + } - // Fast-path for missing the selection entirely. - if (Begin >= SelEnd || End <= SelBegin) - return; - - // We will consider the range (at least partially) selected if it hit any - // selected and previously unclaimed token. - bool ClaimedAnyToken = false; - // The selection is (at most) partial if: - // - any claimed token is partially selected - // - any token in the range is unselected - bool PartialSelection = false; - - // Find the first token that (maybe) overlaps the claimed range. - auto Start = llvm::partition_point(Tokens, [&](const TokInfo &Tok) { - return Tok.EndOffset <= Begin; - }); - // Iterate over every token that overlaps the range. - // Claim selected tokens, and update the two result flags. - for (auto It = Start; It != Tokens.end() && It->StartOffset < End; ++It) { - if (It->Selected) { - if (!It->Claimed) { - // Token is selected, in the node's range, and unclaimed; claim it. - It->Claimed = true; - ClaimedAnyToken = true; - // If the token was only partially selected, so is the node. - PartialSelection |= (It->Selected == SelectionTree::Partial); - } - } else { - // If the node covers an unselected token, it's not completely selected. - PartialSelection = true; + // Cheap check whether any of the tokens in R might be selected. + // If it returns false, test() will return NoTokens or Unselected. + // If it returns true, test() may return any value. + bool mayHit(SourceRange R) const { + if (SpelledTokens.empty()) + return false; + auto B = SM.getDecomposedLoc(R.getBegin()); + auto E = SM.getDecomposedLoc(R.getEnd()); + if (B.first == SelFile && E.first == SelFile) + if (E.second < SpelledTokens.front().Offset || + B.second > SpelledTokens.back().Offset) + return false; + return true; + } + +private: + // Hit-test a consecutive range of tokens from a single file ID. + SelectionTree::Selection + testChunk(FileID FID, llvm::ArrayRef Batch) const { + assert(!Batch.empty()); + SourceLocation StartLoc = Batch.front().location(); + // There are several possible categories of FileID depending on how the + // preprocessor was used to generate these tokens: + // main file, #included file, macro args, macro bodies. + // We need to identify the main-file tokens that represent Batch, and + // determine whether we want to exclusively claim them. Regular tokens + // represent one AST construct, but a macro invocation can represent many. + + // Handle tokens written directly in the main file. + if (FID == SelFile) { + return testTokenRange(SM.getFileOffset(Batch.front().location()), + SM.getFileOffset(Batch.back().location())); + } + + // Handle tokens in another file #included into the main file. + // Check if the #include is selected, but don't claim it exclusively. + if (StartLoc.isFileID()) { + for (SourceLocation Loc = Batch.front().location(); Loc.isValid(); + Loc = SM.getIncludeLoc(SM.getFileID(Loc))) { + if (SM.getFileID(Loc) == SelFile) + // FIXME: use whole #include directive, not just the filename string. + return testToken(SM.getFileOffset(Loc)); } + return NoTokens; } - // If some tokens were previously claimed (Result != Unselected), we may - // upgrade from Partial->Complete, even if no new tokens were claimed. - // Important for [[int a]]. - if (ClaimedAnyToken || Result) { - Result = std::max(Result, PartialSelection ? SelectionTree::Partial - : SelectionTree::Complete); + assert(StartLoc.isMacroID()); + // Handle tokens that were passed as a macro argument. + SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc); + if (SM.getFileID(ArgStart) == SelFile) { + SourceLocation ArgEnd = SM.getTopMacroCallerLoc(Batch.back().location()); + return testTokenRange(SM.getFileOffset(ArgStart), + SM.getFileOffset(ArgEnd)); } + + // Handle tokens produced by non-argument macro expansion. + // Check if the macro name is selected, don't claim it exclusively. + auto Expansion = SM.getDecomposedExpansionLoc(StartLoc); + if (Expansion.first == SelFile) + // FIXME: also check ( and ) for function-like macros? + return testToken(Expansion.second); + else + return NoTokens; } -private: - struct TokInfo { - unsigned StartOffset; - unsigned EndOffset; + // Is the closed token range [Begin, End] selected? + SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const { + assert(Begin <= End); + // Outside the selection entirely? + if (End < SpelledTokens.front().Offset || + Begin > SpelledTokens.back().Offset) + return SelectionTree::Unselected; + + // Compute range of tokens. + auto B = llvm::partition_point( + SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; }); + auto E = std::partition_point( + B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; }); + + // Aggregate selectedness of tokens in range. + bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset || + End > SpelledTokens.back().Offset; + SelectionTree::Selection Result = + ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens; + for (auto It = B; It != E; ++It) + update(Result, It->Selected); + return Result; + } + + // Is the token at `Offset` selected? + SelectionTree::Selection testToken(unsigned Offset) const { + // Outside the selection entirely? + if (Offset < SpelledTokens.front().Offset || + Offset > SpelledTokens.back().Offset) + return SelectionTree::Unselected; + // Find the token, if it exists. + auto It = llvm::partition_point( + SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; }); + if (It != SpelledTokens.end() && It->Offset == Offset) + return It->Selected; + return NoTokens; + } + + struct Tok { + unsigned Offset; SelectionTree::Selection Selected; - bool Claimed; - bool operator<(const TokInfo &Other) const { - return StartOffset < Other.StartOffset; - } }; - std::vector Tokens; - unsigned SelBegin, SelEnd; + std::vector SpelledTokens; + FileID SelFile; + const SourceManager &SM; }; // Show the type of a node for debugging. @@ -195,16 +383,6 @@ class SelectionVisitor : public RecursiveASTVisitor { V.TraverseAST(AST); assert(V.Stack.size() == 1 && "Unpaired push/pop?"); assert(V.Stack.top() == &V.Nodes.front()); - // We selected TUDecl if tokens were unclaimed (or the file is empty). - SelectionTree::Selection UnclaimedTokens = SelectionTree::Unselected; - V.Claimed.claim(Begin, End, UnclaimedTokens); - if (UnclaimedTokens || V.Nodes.size() == 1) { - StringRef FileContent = AST.getSourceManager().getBufferData(File); - // Don't require the trailing newlines to be selected. - bool SelectedAll = Begin == 0 && End >= FileContent.rtrim().size(); - V.Stack.top()->Selected = - SelectedAll ? SelectionTree::Complete : SelectionTree::Partial; - } return std::move(V.Nodes); } @@ -289,11 +467,8 @@ class SelectionVisitor : public RecursiveASTVisitor { #ifndef NDEBUG PrintPolicy(PP), #endif - Claimed(Tokens.spelledTokens(SelFile), SM, SelBegin, SelEnd), - SelFile(SelFile), - SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken( - SM.getComposedLoc(SelFile, SelBegin), SM, LangOpts))), - SelEnd(SelEnd) { + TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM), + UnclaimedExpandedTokens(Tokens.expandedTokens()) { // Ensure we have a node for the TU decl, regardless of traversal scope. Nodes.emplace_back(); Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl()); @@ -346,18 +521,12 @@ class SelectionVisitor : public RecursiveASTVisitor { // don't intersect the selection may be recursively skipped. bool canSafelySkipNode(const DynTypedNode &N) { SourceRange S = N.getSourceRange(); - auto B = SM.getDecomposedLoc(S.getBegin()); - auto E = SM.getDecomposedLoc(S.getEnd()); - // Node lies in a macro expansion? - if (B.first != SelFile || E.first != SelFile) - return false; - // Node intersects selection tokens? - if (B.second < SelEnd && E.second >= SelBeginTokenStart) - return false; - // Otherwise, allow skipping over the node. - dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent()); - dlog("{1}skipped range = {0}", S.printToString(SM), indent(1)); - return true; + if (!SelChecker.mayHit(S)) { + dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent()); + dlog("{1}skipped range = {0}", S.printToString(SM), indent(1)); + return true; + } + return false; } // There are certain nodes we want to treat as leaves in the SelectionTree, @@ -377,11 +546,9 @@ class SelectionVisitor : public RecursiveASTVisitor { Nodes.emplace_back(); Nodes.back().ASTNode = std::move(Node); Nodes.back().Parent = Stack.top(); + Nodes.back().Selected = NoTokens; Stack.push(&Nodes.back()); claimRange(Early, Nodes.back().Selected); - // Early hit detection never selects the whole node. - if (Nodes.back().Selected) - Nodes.back().Selected = SelectionTree::Partial; } // Pops a node off the ancestor stack, and finalizes it. Pairs with push(). @@ -390,6 +557,8 @@ class SelectionVisitor : public RecursiveASTVisitor { Node &N = *Stack.top(); dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); claimRange(N.ASTNode.getSourceRange(), N.Selected); + if (N.Selected == NoTokens) + N.Selected = SelectionTree::Unselected; if (N.Selected || !N.Children.empty()) { // Attach to the tree. N.Parent->Children.push_back(&N); @@ -424,31 +593,12 @@ class SelectionVisitor : public RecursiveASTVisitor { // This is usually called from pop(), so we can take children into account. // The existing state of Result is relevant (early/late claims can interact). void claimRange(SourceRange S, SelectionTree::Selection &Result) { - if (!S.isValid()) - return; - // toHalfOpenFileRange() allows selection of constructs in macro args. e.g: - // #define LOOP_FOREVER(Body) for(;;) { Body } - // void IncrementLots(int &x) { - // LOOP_FOREVER( ++x; ) - // } - // Selecting "++x" or "x" will do the right thing. - auto Range = toHalfOpenFileRange(SM, LangOpts, S); - assert(Range && "We should be able to get the File Range"); - dlog("{1}claimRange: {0}", Range->printToString(SM), indent()); - auto B = SM.getDecomposedLoc(Range->getBegin()); - auto E = SM.getDecomposedLoc(Range->getEnd()); - // Otherwise, nodes in macro expansions can't be selected. - if (B.first != SelFile || E.first != SelFile) - return; - // Attempt to claim the remaining range. If there's nothing to claim, only - // children were selected. - Claimed.claim(B.second, E.second, Result); - if (Result) - dlog("{1}hit selection: {0}", - SourceRange(SM.getComposedLoc(B.first, B.second), - SM.getComposedLoc(E.first, E.second)) - .printToString(SM), - indent()); + for (const auto &ClaimedRange : + UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S))) + update(Result, SelChecker.test(ClaimedRange)); + + if (Result && Result != NoTokens) + dlog("{1}hit selection: {0}", S.printToString(SM), indent()); } std::string indent(int Offset = 0) { @@ -463,17 +613,11 @@ class SelectionVisitor : public RecursiveASTVisitor { #ifndef NDEBUG const PrintingPolicy &PrintPolicy; #endif + const syntax::TokenBuffer &TokenBuf; std::stack Stack; - SelectedTokens Claimed; + SelectionTester SelChecker; + IntervalSet UnclaimedExpandedTokens; std::deque Nodes; // Stable pointers as we add more nodes. - FileID SelFile; - // If the selection start slices a token in half, the beginning of that token. - // This is useful for checking whether the end of a token range overlaps - // the selection: range.end < SelBeginTokenStart is equivalent to - // range.end + measureToken(range.end) < SelBegin (assuming range.end points - // to a token), and it saves a lex every time. - unsigned SelBeginTokenStart; - unsigned SelEnd; }; } // namespace diff --git a/clang-tools-extra/clangd/Selection.h b/clang-tools-extra/clangd/Selection.h index 9bcb9d5fb01f0..a7050c49be6ba 100644 --- a/clang-tools-extra/clangd/Selection.h +++ b/clang-tools-extra/clangd/Selection.h @@ -76,7 +76,7 @@ class SelectionTree { unsigned Start, unsigned End); // Describes to what extent an AST node is covered by the selection. - enum Selection { + enum Selection : unsigned char { // The AST node owns no characters covered by the selection. // Note that characters owned by children don't count: // if (x == 0) scream(); diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp index 6f4ccd88b978e..ec9fd4185d943 100644 --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -134,6 +134,15 @@ TEST(SelectionTest, CommonAncestor) { )cpp", "IfStmt", }, + { + R"cpp( + int x(int); + #define M(foo) x(foo) + int a = 42; + int b = M([[^a]]); + )cpp", + "DeclRefExpr", + }, { R"cpp( void foo(); @@ -378,6 +387,7 @@ TEST(SelectionTest, Selected) { $C[[return]]; }]] else [[{^ }]]]] + char z; } )cpp", R"cpp( @@ -386,10 +396,10 @@ TEST(SelectionTest, Selected) { void foo(^$C[[unique_ptr<$C[[unique_ptr<$C[[int]]>]]>]]^ a) {} )cpp", R"cpp(int a = [[5 >^> 1]];)cpp", - R"cpp([[ + R"cpp( #define ECHO(X) X - ECHO(EC^HO([[$C[[int]]) EC^HO(a]])); - ]])cpp", + ECHO(EC^HO($C[[int]]) EC^HO(a)); + )cpp", R"cpp( $C[[^$C[[int]] a^]]; )cpp", R"cpp( $C[[^$C[[int]] a = $C[[5]]^]]; )cpp", }; @@ -428,6 +438,56 @@ TEST(SelectionTest, PathologicalPreprocessor) { EXPECT_EQ("WhileStmt", T.commonAncestor()->Parent->kind()); } +TEST(SelectionTest, IncludedFile) { + const char *Case = R"cpp( + void test() { +#include "Exp^and.inc" + break; + } + )cpp"; + Annotations Test(Case); + auto TU = TestTU::withCode(Test.code()); + TU.AdditionalFiles["Expand.inc"] = "while(1)\n"; + auto AST = TU.build(); + auto T = makeSelectionTree(Case, AST); + + EXPECT_EQ("WhileStmt", T.commonAncestor()->kind()); +} + +TEST(SelectionTest, MacroArgExpansion) { + // If a macro arg is expanded several times, we consider them all selected. + const char *Case = R"cpp( + int mul(int, int); + #define SQUARE(X) mul(X, X); + int nine = SQUARE(^3); + )cpp"; + Annotations Test(Case); + auto AST = TestTU::withCode(Test.code()).build(); + auto T = makeSelectionTree(Case, AST); + // Unfortunately, this makes the common ancestor the CallExpr... + // FIXME: hack around this by picking one? + EXPECT_EQ("CallExpr", T.commonAncestor()->kind()); + EXPECT_FALSE(T.commonAncestor()->Selected); + EXPECT_EQ(2u, T.commonAncestor()->Children.size()); + for (const auto* N : T.commonAncestor()->Children) { + EXPECT_EQ("IntegerLiteral", N->kind()); + EXPECT_TRUE(N->Selected); + } + + // Verify that the common assert() macro doesn't suffer from this. + // (This is because we don't associate the stringified token with the arg). + Case = R"cpp( + void die(const char*); + #define assert(x) (x ? (void)0 : die(#x) + void foo() { assert(^42); } + )cpp"; + Test = Annotations(Case); + AST = TestTU::withCode(Test.code()).build(); + T = makeSelectionTree(Case, AST); + + EXPECT_EQ("IntegerLiteral", T.commonAncestor()->kind()); +} + TEST(SelectionTest, Implicit) { const char* Test = R"cpp( struct S { S(const char*); }; diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp index 4e481241acd8c..dc76999040195 100644 --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -269,7 +269,7 @@ TEST_F(ExtractVariableTest, Test) { EXPECT_UNAVAILABLE(UnavailableCases); // vector of pairs of input and output strings - const std::vector> + const std::vector> InputOutputs = { // extraction from variable declaration/assignment {R"cpp(void varDecl() { @@ -321,17 +321,10 @@ TEST_F(ExtractVariableTest, Test) { if(1) LOOP(5 + [[3]]) })cpp", - /*FIXME: It should be extracted like this. SelectionTree needs to be - * fixed for macros. R"cpp(#define LOOP(x) while (1) {a = x;} - void f(int a) { - auto dummy = 3; if(1) - LOOP(5 + dummy) - })cpp"},*/ - R"cpp(#define LOOP(x) while (1) {a = x;} void f(int a) { - auto dummy = LOOP(5 + 3); if(1) - dummy + auto dummy = 3; if(1) + LOOP(5 + dummy) })cpp"}, {R"cpp(#define LOOP(x) do {x;} while(1); void f(int a) { @@ -644,13 +637,18 @@ void f(const int c) { )cpp"; EXPECT_EQ(apply(TemplateFailInput), "unavailable"); - // FIXME: This should be extractable after selectionTree works correctly for - // macros (currently it doesn't select anything for the following case) - std::string MacroFailInput = R"cpp( + std::string MacroInput = R"cpp( #define F(BODY) void f() { BODY } F ([[int x = 0;]]) )cpp"; - EXPECT_EQ(apply(MacroFailInput), "unavailable"); + std::string MacroOutput = R"cpp( + #define F(BODY) void f() { BODY } + void extracted() { +int x = 0; +} +F (extracted();) + )cpp"; + EXPECT_EQ(apply(MacroInput), MacroOutput); // Shouldn't crash. EXPECT_EQ(apply("void f([[int a]]);"), "unavailable"); diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h index 301432d3888b3..6f4d0e0c050af 100644 --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -175,6 +175,7 @@ class TokenBuffer { /// All tokens produced by the preprocessor after all macro replacements, /// directives, etc. Source locations found in the clang AST will always /// point to one of these tokens. + /// Tokens are in TU order (per SourceManager::isBeforeInTranslationUnit()). /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split /// into two '>' tokens by the parser. However, TokenBuffer currently /// keeps it as a single '>>' token. @@ -182,6 +183,10 @@ class TokenBuffer { return ExpandedTokens; } + /// Returns the subrange of expandedTokens() corresponding to the closed + /// token range R. + llvm::ArrayRef expandedTokens(SourceRange R) const; + /// Find the subrange of spelled tokens that produced the corresponding \p /// Expanded tokens. /// diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index a2c3bc137d6ba..5941507e086d2 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -119,6 +119,22 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const { return Text.substr(Begin, length()); } +llvm::ArrayRef TokenBuffer::expandedTokens(SourceRange R) const { + if (R.isInvalid()) + return {}; + const Token *Begin = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); + }); + const Token *End = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); + }); + if (Begin > End) + return {}; + return {Begin, End}; +} + std::pair TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 6ffe2c43dd0ff..2c462d49ee410 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Annotations.h" #include "llvm/Testing/Support/SupportHelpers.h" +#include "gmock/gmock.h" #include #include #include @@ -663,6 +664,20 @@ TEST_F(TokenBufferTest, SpelledByExpanded) { ValueIs(SameRange(findSpelled("not_mapped")))); } +TEST_F(TokenBufferTest, ExpandedTokensForRange) { + recordTokens(R"cpp( + #define SIGN(X) X##_washere + A SIGN(B) C SIGN(D) E SIGN(F) G + )cpp"); + + SourceRange R(findExpanded("C").front().location(), + findExpanded("F_washere").front().location()); + // Sanity check: expanded and spelled tokens are stored separately. + EXPECT_THAT(Buffer.expandedTokens(R), + SameRange(findExpanded("C D_washere E F_washere"))); + EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); +} + TEST_F(TokenBufferTest, ExpansionStartingAt) { // Object-like macro expansions. recordTokens(R"cpp( From 471d06020a6a12c621131c508e58878890db3906 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Fri, 29 Nov 2019 10:52:13 -0500 Subject: [PATCH 229/591] [CIndex] Fix annotate-deep-statements test when using a Debug build Differential Revision: https://reviews.llvm.org/D70149 --- clang/lib/Sema/SemaChecking.cpp | 3 ++- clang/tools/libclang/CIndex.cpp | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index adefca7fe4e78..ed42833531d42 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -12950,7 +12950,8 @@ class SequenceChecker : public EvaluatedExprVisitor { // expression or statement in the body of the function [and thus before // the value computation of its result]. SequencedSubexpression Sequenced(*this); - Base::VisitCallExpr(CE); + SemaRef.runWithSufficientStackSpace(CE->getExprLoc(), + [&] { Base::VisitCallExpr(CE); }); // FIXME: CXXNewExpr and CXXDeleteExpr implicitly call functions. } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 2078e47195226..a8222356db44a 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -3595,6 +3595,7 @@ enum CXErrorCode clang_parseTranslationUnit2( const char *const *command_line_args, int num_command_line_args, struct CXUnsavedFile *unsaved_files, unsigned num_unsaved_files, unsigned options, CXTranslationUnit *out_TU) { + noteBottomOfStack(); SmallVector Args; Args.push_back("clang"); Args.append(command_line_args, command_line_args + num_command_line_args); @@ -3619,6 +3620,7 @@ enum CXErrorCode clang_parseTranslationUnit2FullArgv( CXErrorCode result = CXError_Failure; auto ParseTranslationUnitImpl = [=, &result] { + noteBottomOfStack(); result = clang_parseTranslationUnit_Impl( CIdx, source_filename, command_line_args, num_command_line_args, llvm::makeArrayRef(unsaved_files, num_unsaved_files), options, out_TU); @@ -6622,9 +6624,10 @@ void clang_enableStackTraces(void) { void clang_executeOnThread(void (*fn)(void*), void *user_data, unsigned stack_size) { - llvm::llvm_execute_on_thread( - fn, user_data, - stack_size == 0 ? llvm::None : llvm::Optional(stack_size)); + llvm::llvm_execute_on_thread(fn, user_data, + stack_size == 0 + ? clang::DesiredStackSize + : llvm::Optional(stack_size)); } //===----------------------------------------------------------------------===// From 3b0b7536a3182c1c69ade2bee80cdf954813a059 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Fri, 29 Nov 2019 11:28:49 -0500 Subject: [PATCH 230/591] On Windows, fix fuse-ld.c test when lld is provided explictly in -DCMAKE_LINKER --- clang/test/Driver/fuse-ld.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/fuse-ld.c b/clang/test/Driver/fuse-ld.c index 4b2ec7b1bb2ae..13e709ccfdfa4 100644 --- a/clang/test/Driver/fuse-ld.c +++ b/clang/test/Driver/fuse-ld.c @@ -79,13 +79,13 @@ // RUN: %clang %s -### -fuse-ld=lld \ // RUN: -target i686-unknown-windows-msvc 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-WINDOWS-MSVC-LLD -// CHECK-WINDOWS-MSVC-LLD: "{{.*}}lld-link" +// CHECK-WINDOWS-MSVC-LLD: "{{.*}}lld-link{{\.exe"|"}} // CHECK-WINDOWS-MSVC-LLD-SAME: "-out:{{.*}}" // RUN: %clang %s -### -fuse-ld=lld-link \ // RUN: -target i686-unknown-windows-msvc 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-WINDOWS-MSVC-LLD-LINK -// CHECK-WINDOWS-MSVC-LLD-LINK: "{{.*}}lld-link" +// CHECK-WINDOWS-MSVC-LLD-LINK: "{{.*}}lld-link{{\.exe"|"}} // CHECK-WINDOWS-MSVC-LLD-LINK-SAME: "-out:{{.*}}" // RUN: %clang %s -### -fuse-ld=bfd \ From c313a6bdbe365644a84069162a5f4d73dec2131c Mon Sep 17 00:00:00 2001 From: Carey Williams Date: Fri, 29 Nov 2019 17:00:55 +0000 Subject: [PATCH 231/591] Revert "[NFC] Fix test reserve_global_reg.ll after 2d739f9" This reverts commit aea7578fade2563cb5ea60548914667b515c457a. --- llvm/test/Feature/reserve_global_reg.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/Feature/reserve_global_reg.ll b/llvm/test/Feature/reserve_global_reg.ll index 405f3eea00a5e..06081cae1fb2f 100644 --- a/llvm/test/Feature/reserve_global_reg.ll +++ b/llvm/test/Feature/reserve_global_reg.ll @@ -1,4 +1,3 @@ -; REQUIRES: arm ; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s ; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s ; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s From 76fd58d0fe69667304569cb1c4cffe041c3cb9c5 Mon Sep 17 00:00:00 2001 From: Carey Williams Date: Fri, 29 Nov 2019 17:01:05 +0000 Subject: [PATCH 232/591] Revert "[ARM] Allocatable Global Register Variables for ARM" This reverts commit 2d739f98d8a53e38bf9faa88cdb6b0c2a363fb77. --- clang/docs/ClangCommandLineReference.rst | 21 ------- .../clang/Basic/DiagnosticDriverKinds.td | 4 -- clang/include/clang/Basic/DiagnosticGroups.td | 3 - .../clang/Basic/DiagnosticSemaKinds.td | 2 - clang/include/clang/Basic/TargetInfo.h | 6 -- clang/include/clang/Driver/Options.td | 5 +- clang/lib/Basic/Targets/ARM.cpp | 32 ---------- clang/lib/Basic/Targets/ARM.h | 3 - clang/lib/Driver/ToolChains/Arch/ARM.cpp | 38 ++--------- clang/lib/Sema/SemaDecl.cpp | 2 - clang/test/Driver/arm-reserved-reg-options.c | 35 ----------- clang/test/Sema/arm-global-regs.c | 20 ------ llvm/lib/Target/ARM/ARM.td | 8 +-- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 2 +- llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 19 ++---- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 17 +---- llvm/lib/Target/ARM/ARMISelLowering.cpp | 12 +--- llvm/lib/Target/ARM/ARMSubtarget.cpp | 19 ++---- llvm/lib/Target/ARM/ARMSubtarget.h | 9 ++- llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 4 +- .../CodeGen/ARM/reg-alloc-fixed-r6-vla.ll | 44 ------------- .../reg-alloc-with-fixed-reg-r6-modified.ll | 63 ------------------- .../ARM/reg-alloc-with-fixed-reg-r6.ll | 57 ----------------- .../CodeGen/ARM/reg-alloc-wout-fixed-regs.ll | 58 ----------------- .../CodeGen/Thumb/callee_save_reserved.ll | 15 ----- llvm/test/Feature/reserve_global_reg.ll | 29 --------- 26 files changed, 29 insertions(+), 498 deletions(-) delete mode 100644 clang/test/Driver/arm-reserved-reg-options.c delete mode 100644 clang/test/Sema/arm-global-regs.c delete mode 100644 llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll delete mode 100644 llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll delete mode 100644 llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll delete mode 100644 llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll delete mode 100644 llvm/test/CodeGen/Thumb/callee_save_reserved.ll delete mode 100644 llvm/test/Feature/reserve_global_reg.ll diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 492eec71f2e4e..e8d561fae9564 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -2430,31 +2430,10 @@ Enable XNACK (AMDGPU only) ARM --- - -.. option:: -ffixed-r6 - -Reserve the r6 register (ARM only) - -.. option:: -ffixed-r7 - -Reserve the r7 register (ARM only) - -.. option:: -ffixed-r8 - -Reserve the r8 register (ARM only) - .. option:: -ffixed-r9 Reserve the r9 register (ARM only) -.. option:: -ffixed-r10 - -Reserve the r10 register (ARM only) - -.. option:: -ffixed-r11 - -Reserve the r11 register (ARM only) - .. option:: -mexecute-only, -mno-execute-only, -mpure-code Disallow generation of data access to code sections (ARM only) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index b4904bb9d2dc0..39242c972ea28 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -464,10 +464,6 @@ def warn_drv_msp430_hwmult_no_device : Warning<"no MCU device specified, but " "specify a MSP430 device, or -mhwmult to set hardware multiply type " "explicitly.">, InGroup; -// Frame pointer reservation. -def err_reserved_frame_pointer : Error< - "'%0' has been specified but '%1' is used as the frame pointer for this target">; - def warn_drv_libstdcxx_not_found : Warning< "include path for libstdc++ headers not found; pass '-stdlib=libc++' on the " "command line to use the libc++ standard library instead">, diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 5bfb3de86a477..35e939fda95c4 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1120,6 +1120,3 @@ def CrossTU : DiagGroup<"ctu">; def CTADMaybeUnsupported : DiagGroup<"ctad-maybe-unsupported">; def FortifySource : DiagGroup<"fortify-source">; - -// Register reservation. -def FixedRegs : DiagGroup<"fixed-registers">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c30f65d94581e..73e329fcf2fa0 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7743,8 +7743,6 @@ let CategoryName = "Inline Assembly Issue" in { def err_asm_unknown_register_name : Error<"unknown register name '%0' in asm">; def err_asm_invalid_global_var_reg : Error<"register '%0' unsuitable for " "global register variables on this target">; - def err_asm_missing_fixed_reg_opt : Error<"-ffixed-%0 is required for " - "global named register variable declaration">; def err_asm_register_size_mismatch : Error<"size of register '%0' does not " "match variable size">; def err_asm_bad_register_type : Error<"bad type for named register variable">; diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index cc83f4c34c145..33cecdadc686c 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -938,12 +938,6 @@ class TargetInfo : public virtual TransferrableTargetInfo, return true; } - /// Check if the register is reserved globally - /// - /// This function returns true if the register passed in RegName is reserved - /// using the corresponding -ffixed-RegName option. - virtual bool isRegisterReservedGlobally(StringRef) const { return true; } - // validateOutputConstraint, validateInputConstraint - Checks that // a constraint is valid and provides information about it. // FIXME: These should return a real error instead of just true/false. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index daba98a39dab5..a64d0acb06fe1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2267,9 +2267,8 @@ def mrestrict_it: Flag<["-"], "mrestrict-it">, Group, def mno_restrict_it: Flag<["-"], "mno-restrict-it">, Group, HelpText<"Allow generation of deprecated IT blocks for ARMv8. It is off by default for ARMv8 Thumb mode">; def marm : Flag<["-"], "marm">, Alias; -foreach i = {6-11} in - def ffixed_r#i : Flag<["-"], "ffixed-r"#i>, Group, - HelpText<"Reserve the r"#i#" register (ARM only)">; +def ffixed_r9 : Flag<["-"], "ffixed-r9">, Group, + HelpText<"Reserve the r9 register (ARM only)">; def mno_movt : Flag<["-"], "mno-movt">, Group, HelpText<"Disallow use of movt/movw pairs (ARM only)">; def mcrc : Flag<["-"], "mcrc">, Group, diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 92e5e26eba3c2..437a77afdc998 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -879,38 +879,6 @@ ArrayRef ARMTargetInfo::getGCCRegAliases() const { return llvm::makeArrayRef(GCCRegAliases); } -bool ARMTargetInfo::validateGlobalRegisterVariable( - StringRef RegName, unsigned RegSize, bool &HasSizeMismatch) const { - bool isValid = llvm::StringSwitch(RegName) - .Case("r6", true) - .Case("r7", true) - .Case("r8", true) - .Case("r9", true) - .Case("r10", true) - .Case("r11", true) - .Case("sp", true) - .Default(false); - HasSizeMismatch = false; - return isValid; -} - -bool ARMTargetInfo::isRegisterReservedGlobally(StringRef RegName) const { - // The "sp" register does not have a -ffixed-sp option, - // so reserve it unconditionally. - if (RegName.equals("sp")) - return true; - - // reserve rN (N:6-11) registers only if the corresponding - // +reserve-rN feature is found - const std::vector &Features = getTargetOpts().Features; - const std::string SearchFeature = "+reserve-" + RegName.str(); - for (const std::string &Feature : Features) { - if (Feature.compare(SearchFeature) == 0) - return true; - } - return false; -} - bool ARMTargetInfo::validateAsmConstraint( const char *&Name, TargetInfo::ConstraintInfo &Info) const { switch (*Name) { diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index 90fb20f8f7a5f..ce87a6265934b 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -161,9 +161,6 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { ArrayRef getGCCRegNames() const override; ArrayRef getGCCRegAliases() const override; - bool validateGlobalRegisterVariable(StringRef RegName, unsigned RegSize, - bool &HasSizeMismatch) const override; - bool isRegisterReservedGlobally(StringRef RegName) const override; bool validateAsmConstraint(const char *&Name, TargetInfo::ConstraintInfo &Info) const override; std::string convertConstraint(const char *&Constraint) const override; diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index cca47722c2044..68a57310ad402 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -592,39 +592,11 @@ void arm::getARMTargetFeatures(const ToolChain &TC, Features.push_back("+strict-align"); } - // Do not allow r9 reservation with -frwpi. - if (Args.hasArg(options::OPT_ffixed_r9) && Args.hasArg(options::OPT_frwpi)) { - Arg *A = Args.getLastArg(options::OPT_ffixed_r9); - Arg *B = Args.getLastArg(options::OPT_frwpi); - D.Diag(diag::err_opt_not_valid_with_opt) - << A->getAsString(Args) << B->getAsString(Args); - } - - // The compiler can still use a FP in certain circumstances, - // even when frame pointer elimination is enabled. Thus we should - // not allow to reserve a target's FP register. - const llvm::opt::OptSpecifier RestrictFPOpt = - (Triple.isOSDarwin() || (!Triple.isOSWindows() && Triple.isThumb())) - ? options::OPT_ffixed_r7 - : options::OPT_ffixed_r11; - if (Args.hasArg(RestrictFPOpt)) { - const std::string OptStr = - Args.getLastArg(RestrictFPOpt)->getAsString(Args); - const unsigned int SubStrIndex = strlen("ffixed-r"); - D.Diag(diag::err_reserved_frame_pointer) - << OptStr << OptStr.substr(SubStrIndex); - } - -// Reservation of general purpose registers. -#define HANDLE_FFIXED_R(n) \ - if (Args.hasArg(options::OPT_ffixed_r##n)) \ - Features.push_back("+reserve-r" #n) - HANDLE_FFIXED_R(6); - HANDLE_FFIXED_R(7); - HANDLE_FFIXED_R(8); - HANDLE_FFIXED_R(9); - HANDLE_FFIXED_R(10); - HANDLE_FFIXED_R(11); + // llvm does not support reserving registers in general. There is support + // for reserving r9 on ARM though (defined as a platform-specific register + // in ARM EABI). + if (Args.hasArg(options::OPT_ffixed_r9)) + Features.push_back("+reserve-r9"); // The kext linker doesn't know how to deal with movw/movt. if (KernelOrKext || Args.hasArg(options::OPT_mno_movt)) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index dffb460cedc9b..d35037273106a 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7042,8 +7042,6 @@ NamedDecl *Sema::ActOnVariableDeclarator( Diag(E->getExprLoc(), diag::err_asm_invalid_global_var_reg) << Label; else if (HasSizeMismatch) Diag(E->getExprLoc(), diag::err_asm_register_size_mismatch) << Label; - else if (!TI.isRegisterReservedGlobally(Label)) - Diag(E->getExprLoc(), diag::err_asm_missing_fixed_reg_opt) << Label; } if (!R->isIntegralType(Context) && !R->isPointerType()) { diff --git a/clang/test/Driver/arm-reserved-reg-options.c b/clang/test/Driver/arm-reserved-reg-options.c deleted file mode 100644 index e97c717d7e7e7..0000000000000 --- a/clang/test/Driver/arm-reserved-reg-options.c +++ /dev/null @@ -1,35 +0,0 @@ -// ## FP ARM + Thumb -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R11 %s -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r7 -mthumb -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r11 -mthumb -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// RUN: %clang -target thumbv6m-none-eabi -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target thumbv6m-none-eabi -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// ## FP Darwin (R7) -// RUN: %clang -target armv6-apple-darwin9 -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target armv6-apple-darwin9 -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// RUN: %clang -target armv6-apple-ios3 -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target armv6-apple-ios3 -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// RUN: %clang -target armv7s-apple-darwin10 -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R7 %s -// RUN: %clang -target armv7s-apple-darwin10 -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// ## FP Windows (R11) -// RUN: %clang -target armv7-windows -### -ffixed-r11 -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-R11 %s -// RUN: %clang -target armv7-windows -### -ffixed-r7 -c %s 2>&1 | FileCheck -check-prefix=CHECK-NO-ERROR %s - -// ## FRWPI (R9) -// RUN: %clang -target arm-arm-none-eabi -### -frwpi -ffixed-r9 -c %s 2>&1 | FileCheck -check-prefix=CHECK-RESERVED-FRWPI-CONFLICT %s -// RUN: %clang -target arm-arm-none-eabi -### -ffixed-r9 -c %s 2>&1 | FileCheck -check-prefix=CHECK-RESERVED-FRWPI-VALID %s -// RUN: %clang -target arm-arm-none-eabi -### -frwpi -c %s 2>&1 | FileCheck -check-prefix=CHECK-RESERVED-FRWPI-VALID %s - -// CHECK-ERROR-R11: error: '-ffixed-r11' has been specified but 'r11' is used as the frame pointer for this target -// CHECK-ERROR-R7: error: '-ffixed-r7' has been specified but 'r7' is used as the frame pointer for this target -// CHECK-NO-ERROR-NOT: may still be used as a frame pointer - -// CHECK-RESERVED-FRWPI-CONFLICT: option '-ffixed-r9' cannot be specified with '-frwpi' -// CHECK-RESERVED-FRWPI-VALID-NOT: option '-ffixed-r9' cannot be specified with '-frwpi' diff --git a/clang/test/Sema/arm-global-regs.c b/clang/test/Sema/arm-global-regs.c deleted file mode 100644 index 753cb60e68388..0000000000000 --- a/clang/test/Sema/arm-global-regs.c +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang_cc1 -ffreestanding -fsyntax-only -target-feature +reserve-r9 -verify -triple arm-arm-none-eabi %s - -// Check a small subset of valid and invalid global register variable declarations. -// Also check that for global register variables without -ffixed-reg options it throws an error. - -register unsigned arm_r3 __asm("r3"); //expected-error {{register 'r3' unsuitable for global register variables on this target}} - -register unsigned arm_r12 __asm("r12"); //expected-error {{register 'r12' unsuitable for global register variables on this target}} - -register unsigned arm_r5 __asm("r5"); //expected-error {{register 'r5' unsuitable for global register variables on this target}} - -register unsigned arm_r9 __asm("r9"); - -register unsigned arm_r6 __asm("r6"); //expected-error {{-ffixed-r6 is required for global named register variable declaration}} - -register unsigned arm_r7 __asm("r7"); //expected-error {{-ffixed-r7 is required for global named register variable declaration}} - -register unsigned *parm_r7 __asm("r7"); //expected-error {{-ffixed-r7 is required for global named register variable declaration}} - -register unsigned arm_sp __asm("sp"); diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 285dad1cf29a3..66bfd4c82e25c 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -391,11 +391,9 @@ def FeatureExecuteOnly : SubtargetFeature<"execute-only", "Enable the generation of " "execute only code.">; -foreach i = {6-11} in - def FeatureReserveR#i : SubtargetFeature<"reserve-r"#i, - "ReservedGPRegisters["#i#"]", "true", - "Reserve R"#i#", making it " - "unavailable as a GPR">; +def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", + "Reserve R9, making it unavailable" + " as GPR">; def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", "Don't use movt/movw pairs for " diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 10153dd2e3950..ed0969fa625b0 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -752,7 +752,7 @@ void ARMAsmPrinter::emitAttributes() { if (STI.isRWPI()) ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsSB); - else if (STI.isGPRegisterReserved(9)) + else if (STI.isR9Reserved()) ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved); else diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index afcdb648cbc8f..4ace52b32e9ff 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -198,11 +198,9 @@ getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, getFramePointerReg(STI)); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); - for (size_t R = 0; R < ARM::GPRRegClass.getNumRegs(); ++R) { - if (STI.isGPRegisterReserved(R)) { - markSuperRegs(Reserved, ARM::R0 + R); - } - } + // Some targets reserve R9. + if (STI.isR9Reserved()) + markSuperRegs(Reserved, ARM::R9); // Reserve D16-D31 if the subtarget doesn't support them. if (!STI.hasD32()) { static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!"); @@ -282,7 +280,7 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case ARM::GPRRegClassID: { bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() ? TFI->hasFP(MF) : true; - return 10 - HasFP - STI.getNumGPRegistersReserved(); + return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. case ARM::DPRRegClassID: @@ -382,11 +380,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo(); const ARMFrameLowering *TFI = getFrameLowering(MF); - const ARMSubtarget &STI = MF.getSubtarget(); - - // Disable base pointer R6 if -ffixed-r6 is used. - if (STI.isGPRegisterReserved(BasePtr - ARM::R0)) - return false; // If we have stack realignment and VLAs, we have no pointer to use to // access the stack. If we have stack realignment, and a large call frame, @@ -423,7 +416,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFrameLowering *TFI = getFrameLowering(MF); - const ARMSubtarget &STI = MF.getSubtarget(); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, // 2. There are VLAs in the function and the base pointer is disabled. @@ -433,9 +425,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget()))) return false; - // Disable base pointer R6 if -ffixed-r6 is used. - if (STI.isGPRegisterReserved(BasePtr - ARM::R0)) - return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. if (TFI->hasReservedCallFrame(MF)) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 106894e28f033..5428bd6c94b35 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1704,19 +1704,6 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (STI.isRWPI() && Reg == ARM::R9) { - // Paranoid check for use of R9 with RWPI. Clobbering R9 with -frwpi will - // emit warnings about undefined behaviour but maybe theres's a valid use - // case so on that basis allow it to be pushed/popped in the - // prologue/epilogue. - } else if (Reg > ARM::R0 && ARM::GPRRegClass.contains(Reg) && - STI.isGPRegisterReserved(Reg - ARM::R0)) { - LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " has been reserved and" - << " should not be allocatable" - << " or spillable.\n"); - SavedRegs.reset(Reg); - continue; - } bool Spilled = false; if (SavedRegs.test(Reg)) { Spilled = true; @@ -1961,7 +1948,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is saved low register, RegDeficit = " << RegDeficit << "\n"); - } else if (!STI.isGPRegisterReserved(Reg - ARM::R0)) { + } else { AvailableRegs.push_back(Reg); LLVM_DEBUG( dbgs() @@ -1976,7 +1963,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, --RegDeficit; LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " << RegDeficit << "\n"); - } else if (!STI.isGPRegisterReserved(7)) { + } else { AvailableRegs.push_back(ARM::R7); LLVM_DEBUG( dbgs() diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 83a06767a57fc..8271c6fad0fbd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5579,15 +5579,9 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { Register ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) - .Case("r6", ARM::R6) - .Case("r7", ARM::R7) - .Case("r8", ARM::R8) - .Case("r9", ARM::R9) - .Case("r10", ARM::R10) - .Case("r11", ARM::R11) - .Case("sp", ARM::SP) - .Default(ARM::NoRegister); - if (Reg != ARM::NoRegister) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) return Reg; report_fatal_error(Twine("Invalid register name \"" + StringRef(RegName) + "\".")); diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 7a57376a68953..eb4d39b01cbbf 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -98,9 +98,8 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const ARMBaseTargetMachine &TM, bool IsLittle, bool MinSize) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - ReservedGPRegisters(ARM::GPRRegClass.getNumRegs()), CPUString(CPU), - OptMinSize(MinSize), IsLittle(IsLittle), TargetTriple(TT), - Options(TM.Options), TM(TM), + CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle), + TargetTriple(TT), Options(TM.Options), TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. @@ -254,18 +253,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { (Options.UnsafeFPMath || isTargetDarwin())) UseNEONForSinglePrecisionFP = true; - if (isRWPI() || (isTargetMachO() && !HasV6Ops)) - ReservedGPRegisters.set(9); - - // Throw an error when trying to reserve a target's FP register. It may - // be used by the compiler even when frame pointer elimination is enabled. - // FIXME: Throw this error if -frame-pointer=none is not set; otherwise - // only emit a warning. - const int restFP = (useR7AsFramePointer()) ? 7 : 11; - if (isGPRegisterReserved(restFP)) - report_fatal_error( - "Register r" + std::to_string(restFP) + - " has been specified but is used as the frame pointer for this target."); + if (isRWPI()) + ReserveR9 = true; // If MVEVectorCostFactor is still 0 (has not been set to anything else), default it to 2 if (MVEVectorCostFactor == 0) diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index c5836a3eca7b7..f582a92f65639 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -229,8 +229,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM = false; - // ReservedGPRegisters[i] - R#i is not available as a general purpose register - BitVector ReservedGPRegisters; + /// ReserveR9 - True if R9 is not available as a general purpose register. + bool ReserveR9 = false; /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of /// 32-bit imms (including global addresses). @@ -763,9 +763,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo { bool isAClass() const { return ARMProcClass == AClass; } bool isReadTPHard() const { return ReadTPHard; } - bool isGPRegisterReserved(size_t i) const { return ReservedGPRegisters[i]; } - unsigned getNumGPRegistersReserved() const { - return ReservedGPRegisters.count(); + bool isR9Reserved() const { + return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } bool useR7AsFramePointer() const { diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index c1fd01d2df9d5..5bb3bcaf10e77 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -76,9 +76,7 @@ class ARMTTIImpl : public BasicTTIImplBase { ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, - ARM::FeatureExecuteOnly, ARM::FeatureReserveR6, ARM::FeatureReserveR7, - ARM::FeatureReserveR8, ARM::FeatureReserveR9, ARM::FeatureReserveR10, - ARM::FeatureReserveR11, ARM::FeatureNoMovt, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates }; diff --git a/llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll b/llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll deleted file mode 100644 index 0b6fd7443af29..0000000000000 --- a/llvm/test/CodeGen/ARM/reg-alloc-fixed-r6-vla.ll +++ /dev/null @@ -1,44 +0,0 @@ -; Using VLAs(Variable Length Arrays) in a function will use R6 to keep track -; of the stack frame, and also spill/restore R6 to the stack. -; This tests that using -ffixed-r6 (-mattr=+reserve-r6) will stop R6 -; being used and also stop it being spilled/restored to the stack. -; RUN: llc < %s -mcpu=cortex-m0 -mtriple=thumbv7-arm-none-eabi | FileCheck %s --check-prefix=CHECK-STATIC --check-prefix=CHECK-R6 -; RUN: llc < %s -mcpu=cortex-m0 -mtriple=thumbv7-arm-none-eabi -mattr=+reserve-r6 | FileCheck %s --check-prefix=CHECK-STATIC --check-prefix=CHECK-NO-R6 - -define void @f() #0 { -entry: - %i = alloca i32, align 4 - store i32 0, i32* %i, align 4 - - %saved_stack = alloca i8*, align 4 - %0 = call i8* @llvm.stacksave() - store i8* %0, i8** %saved_stack, align 4 - - %__vla_expr0 = alloca i32, align 4 - %1 = load i32, i32* %i, align 4 - %vla = alloca double, i32 %1, align 8 - store i32 %1, i32* %__vla_expr0, align 4 - - %2 = load i8*, i8** %saved_stack, align 4 - call void @llvm.stackrestore(i8* %2) - - ret void -} - -declare i8* @llvm.stacksave() #1 -declare void @llvm.stackrestore(i8* %ptr) #1 - -attributes #0 = { noinline nounwind "stackrealign" } -attributes #1 = { nounwind } - -; CHECK-STATIC: push {r4, -; CHECK-R6: r6 -; CHECK-NO-R6-NOT: r6 -; CHECK-STATIC: lr} -; CHECK-R6: r6 -; CHECK-NO-R6-NOT: r6 -; CHECK-STATIC: pop {r4, -; CHECK-R6: r6 -; CHECK-NO-R6-NOT: r6 -; CHECK-STATIC: pc} - diff --git a/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll b/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll deleted file mode 100644 index e2a4af87dde7e..0000000000000 --- a/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: llc < %s -mattr=+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s -; -; Equivalent C source code -; register unsigned r6 asm("r6"); -; void bar(unsigned int i, -; unsigned int j, -; unsigned int k, -; unsigned int l, -; unsigned int m, -; unsigned int n, -; unsigned int o, -; unsigned int p) -; { -; r6 = 10; -; unsigned int result = i + j + k + l + m + n + o + p; -; } -declare void @llvm.write_register.i32(metadata, i32) nounwind - -define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { -entry: -; CHECK-NOT: push {{{.*}}r6,{{.*}}} -; CHECK: {{.*}}mov{{.*}}r6,{{.*}} -; CHECK-NOT: {{.*}}r6{{.*}} - %i.addr = alloca i32, align 4 - %j.addr = alloca i32, align 4 - %k.addr = alloca i32, align 4 - %l.addr = alloca i32, align 4 - %m.addr = alloca i32, align 4 - %n.addr = alloca i32, align 4 - %o.addr = alloca i32, align 4 - %p.addr = alloca i32, align 4 - %result = alloca i32, align 4 - store i32 %i, i32* %i.addr, align 4 - store i32 %j, i32* %j.addr, align 4 - store i32 %k, i32* %k.addr, align 4 - store i32 %l, i32* %l.addr, align 4 - store i32 %m, i32* %m.addr, align 4 - store i32 %n, i32* %n.addr, align 4 - store i32 %o, i32* %o.addr, align 4 - store i32 %p, i32* %p.addr, align 4 - call void @llvm.write_register.i32(metadata !0, i32 10) - %0 = load i32, i32* %i.addr, align 4 - %1 = load i32, i32* %j.addr, align 4 - %add = add i32 %0, %1 - %2 = load i32, i32* %k.addr, align 4 - %add1 = add i32 %add, %2 - %3 = load i32, i32* %l.addr, align 4 - %add2 = add i32 %add1, %3 - %4 = load i32, i32* %m.addr, align 4 - %add3 = add i32 %add2, %4 - %5 = load i32, i32* %n.addr, align 4 - %add4 = add i32 %add3, %5 - %6 = load i32, i32* %o.addr, align 4 - %add5 = add i32 %add4, %6 - %7 = load i32, i32* %p.addr, align 4 - %add6 = add i32 %add5, %7 - store i32 %add6, i32* %result, align 4 - ret void -} - -!llvm.named.register.r6 = !{!0} -!0 = !{!"r6"} - diff --git a/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll b/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll deleted file mode 100644 index 3647c0701a7c3..0000000000000 --- a/llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll +++ /dev/null @@ -1,57 +0,0 @@ -; RUN: llc < %s -mattr=+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s -; -; Equivalent C source code -; void bar(unsigned int i, -; unsigned int j, -; unsigned int k, -; unsigned int l, -; unsigned int m, -; unsigned int n, -; unsigned int o, -; unsigned int p) -; { -; unsigned int result = i + j + k + l + m + n + o + p; -; } - -define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { -entry: -; CHECK-NOT: push {{{.*}}r6,{{.*}}} - %i.addr = alloca i32, align 4 - %j.addr = alloca i32, align 4 - %k.addr = alloca i32, align 4 - %l.addr = alloca i32, align 4 - %m.addr = alloca i32, align 4 - %n.addr = alloca i32, align 4 - %o.addr = alloca i32, align 4 - %p.addr = alloca i32, align 4 - %result = alloca i32, align 4 - store i32 %i, i32* %i.addr, align 4 - store i32 %j, i32* %j.addr, align 4 - store i32 %k, i32* %k.addr, align 4 - store i32 %l, i32* %l.addr, align 4 - store i32 %m, i32* %m.addr, align 4 - store i32 %n, i32* %n.addr, align 4 - store i32 %o, i32* %o.addr, align 4 - store i32 %p, i32* %p.addr, align 4 - %0 = load i32, i32* %i.addr, align 4 - %1 = load i32, i32* %j.addr, align 4 - %add = add i32 %0, %1 - %2 = load i32, i32* %k.addr, align 4 - %add1 = add i32 %add, %2 - %3 = load i32, i32* %l.addr, align 4 - %add2 = add i32 %add1, %3 - %4 = load i32, i32* %m.addr, align 4 - %add3 = add i32 %add2, %4 - %5 = load i32, i32* %n.addr, align 4 - %add4 = add i32 %add3, %5 - %6 = load i32, i32* %o.addr, align 4 - %add5 = add i32 %add4, %6 - %7 = load i32, i32* %p.addr, align 4 - %add6 = add i32 %add5, %7 - store i32 %add6, i32* %result, align 4 -; CHECK: {{.*}}r5{{.*}} -; CHECK-NOT: {{.*}}r6{{.*}} - ret void -; CHECK-NOT: pop {{{.*}}r6,{{.*}}} -} - diff --git a/llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll b/llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll deleted file mode 100644 index d1f020936a3d6..0000000000000 --- a/llvm/test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s -; -; Equivalent C source code -; void bar(unsigned int i, -; unsigned int j, -; unsigned int k, -; unsigned int l, -; unsigned int m, -; unsigned int n, -; unsigned int o, -; unsigned int p) -; { -; unsigned int result = i + j + k + l + m + n + o + p; -; } - -define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { -entry: -; CHECK: push {{{.*}}r4, r5{{.*}}} - %i.addr = alloca i32, align 4 - %j.addr = alloca i32, align 4 - %k.addr = alloca i32, align 4 - %l.addr = alloca i32, align 4 - %m.addr = alloca i32, align 4 - %n.addr = alloca i32, align 4 - %o.addr = alloca i32, align 4 - %p.addr = alloca i32, align 4 - %result = alloca i32, align 4 - store i32 %i, i32* %i.addr, align 4 - store i32 %j, i32* %j.addr, align 4 - store i32 %k, i32* %k.addr, align 4 - store i32 %l, i32* %l.addr, align 4 - store i32 %m, i32* %m.addr, align 4 - store i32 %n, i32* %n.addr, align 4 - store i32 %o, i32* %o.addr, align 4 - store i32 %p, i32* %p.addr, align 4 - %0 = load i32, i32* %i.addr, align 4 - %1 = load i32, i32* %j.addr, align 4 - %add = add i32 %0, %1 - %2 = load i32, i32* %k.addr, align 4 - %add1 = add i32 %add, %2 - %3 = load i32, i32* %l.addr, align 4 - %add2 = add i32 %add1, %3 - %4 = load i32, i32* %m.addr, align 4 - %add3 = add i32 %add2, %4 - %5 = load i32, i32* %n.addr, align 4 - %add4 = add i32 %add3, %5 - %6 = load i32, i32* %o.addr, align 4 - %add5 = add i32 %add4, %6 - %7 = load i32, i32* %p.addr, align 4 - %add6 = add i32 %add5, %7 - store i32 %add6, i32* %result, align 4 -; CHECK: {{.*}}r4{{.*}} -; CHECK: {{.*}}r5{{.*}} - -; CHECK: pop {{{.*}}r4, r5{{.*}}} - ret void -} - diff --git a/llvm/test/CodeGen/Thumb/callee_save_reserved.ll b/llvm/test/CodeGen/Thumb/callee_save_reserved.ll deleted file mode 100644 index 0329d7886a2a9..0000000000000 --- a/llvm/test/CodeGen/Thumb/callee_save_reserved.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ -; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s - -; Reserved low registers should not be used to correct reg deficit. -define <4 x i32> @four_high_four_return_reserved() { -entry: - ; CHECK-INVALID-NOT: r{{6|8}} - tail call void asm sideeffect "", "~{r8},~{r9}"() - %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 - %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 - %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 - %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 - ret <4 x i32> %vecinit13 -} - diff --git a/llvm/test/Feature/reserve_global_reg.ll b/llvm/test/Feature/reserve_global_reg.ll deleted file mode 100644 index 06081cae1fb2f..0000000000000 --- a/llvm/test/Feature/reserve_global_reg.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: not llc < %s -mtriple=thumbv7-apple-darwin -mattr=+reserve-r7 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP7 %s -; RUN: not llc < %s -mtriple=armv7-windows-msvc -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11 %s -; RUN: not llc < %s -mtriple=thumbv7-windows -mattr=+reserve-r11 -o - 2>&1 | FileCheck -check-prefix=CHECK-RESERVE-FP11-2 %s - -; int test(int a, int b, int c) { -; return a + b + c; -; } - -; Function Attrs: noinline nounwind optnone -define hidden i32 @_Z4testiii(i32 %a, i32 %b, i32 %c) #0 { -entry: - %a.addr = alloca i32, align 4 - %b.addr = alloca i32, align 4 - %c.addr = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - store i32 %b, i32* %b.addr, align 4 - store i32 %c, i32* %c.addr, align 4 - %0 = load i32, i32* %a.addr, align 4 - %1 = load i32, i32* %b.addr, align 4 - %add = add nsw i32 %0, %1 - %2 = load i32, i32* %c.addr, align 4 - %add1 = add nsw i32 %add, %2 - ret i32 %add1 -} - -; CHECK-RESERVE-FP7: Register r7 has been specified but is used as the frame pointer for this target. -; CHECK-RESERVE-FP11: Register r11 has been specified but is used as the frame pointer for this target. -; CHECK-RESERVE-FP11-2: Register r11 has been specified but is used as the frame pointer for this target. - From 26ab827c24c8dcebebad136c0580cae5fdc84c9f Mon Sep 17 00:00:00 2001 From: Sean Fertile Date: Fri, 29 Nov 2019 12:44:56 -0500 Subject: [PATCH 233/591] [PowerPC][AIX] Add support for lowering int/float/double formal arguments. This patch adds LowerFormalArguments_AIX, support is added for lowering int, float, and double formal arguments into general purpose and floating point registers only. The aix calling convention testcase have been redone to test for caller and callee functionality in the same lit test. Patch by Zarko Todorovski! Differential Revision: https://reviews.llvm.org/D69578 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 118 +++- llvm/lib/Target/PowerPC/PPCISelLowering.h | 4 + llvm/test/CodeGen/PowerPC/aix_cc_abi.ll | 614 ++++++++++++++++++++ llvm/test/CodeGen/PowerPC/aix_fpr_param.ll | 150 ----- llvm/test/CodeGen/PowerPC/aix_gpr_param.ll | 199 ------- 5 files changed, 733 insertions(+), 352 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix_cc_abi.ll delete mode 100644 llvm/test/CodeGen/PowerPC/aix_fpr_param.ll delete mode 100644 llvm/test/CodeGen/PowerPC/aix_gpr_param.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8730c88f43531..a4f662dfdddb6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3418,15 +3418,16 @@ SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { + if (Subtarget.isAIXABI()) + return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG, + InVals); if (Subtarget.is64BitELFABI()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); - else if (Subtarget.is32BitELFABI()) + if (Subtarget.is32BitELFABI()) return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); - // FIXME: We are using this for both AIX and Darwin. We should add appropriate - // AIX testing, and rename it appropriately. return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } @@ -6808,6 +6809,117 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, } } +static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, + bool IsPPC64) { + assert((IsPPC64 || SVT != MVT::i64) && + "i64 should have been split for 32-bit codegen."); + + switch (SVT) { + default: + report_fatal_error("Unexpected value type for formal argument"); + case MVT::i1: + case MVT::i32: + case MVT::i64: + return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + case MVT::f32: + return &PPC::F4RCRegClass; + case MVT::f64: + return &PPC::F8RCRegClass; + } +} + +static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, + SelectionDAG &DAG, SDValue ArgValue, + MVT LocVT, const SDLoc &dl) { + assert(ValVT.isScalarInteger() && LocVT.isScalarInteger()); + assert(ValVT.getSizeInBits() < LocVT.getSizeInBits()); + + if (Flags.isSExt()) + ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, + DAG.getValueType(ValVT)); + else if (Flags.isZExt()) + ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue, + DAG.getValueType(ValVT)); + + return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue); +} + +SDValue PPCTargetLowering::LowerFormalArguments_AIX( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + + assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold || + CallConv == CallingConv::Fast) && + "Unexpected calling convention!"); + + if (isVarArg) + report_fatal_error("This call type is unimplemented on AIX."); + + if (getTargetMachine().Options.GuaranteedTailCallOpt) + report_fatal_error("Tail call support is unimplemented on AIX."); + + if (useSoftFloat()) + report_fatal_error("Soft float support is unimplemented on AIX."); + + const PPCSubtarget &Subtarget = + static_cast(DAG.getSubtarget()); + if (Subtarget.hasQPX()) + report_fatal_error("QPX support is not supported on AIX."); + + const bool IsPPC64 = Subtarget.isPPC64(); + const unsigned PtrByteSize = IsPPC64 ? 8 : 4; + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + MachineFunction &MF = DAG.getMachineFunction(); + CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); + + // Reserve space for the linkage area on the stack. + const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + // On AIX a minimum of 8 words is saved to the parameter save area. + const unsigned MinParameterSaveArea = 8 * PtrByteSize; + CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize); + CCInfo.AnalyzeFormalArguments(Ins, CC_AIX); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (VA.isRegLoc()) { + EVT ValVT = VA.getValVT(); + MVT LocVT = VA.getLocVT(); + MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy; + unsigned VReg = + MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); + ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); + if (ValVT.isScalarInteger() && + (ValVT.getSizeInBits() < LocVT.getSizeInBits())) { + ArgValue = + truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl); + } + InVals.push_back(ArgValue); + } else { + report_fatal_error("Handling of formal arguments on the stack is " + "unimplemented!"); + } + } + + // Area that is at least reserved in the caller of this function. + unsigned MinReservedArea = CCInfo.getNextStackOffset(); + + // Set the size that is at least reserved in caller of this function. Tail + // call optimized function's reserved stack space needs to be aligned so + // that taking the difference between two stack areas will result in an + // aligned stack. + MinReservedArea = + EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); + PPCFunctionInfo *FuncInfo = MF.getInfo(); + FuncInfo->setMinReservedArea(MinReservedArea); + + return Chain; +} + SDValue PPCTargetLowering::LowerCall_AIX( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, bool isPatchPoint, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 77b19b2634669..612d1c6b3f26e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1121,6 +1121,10 @@ namespace llvm { SelectionDAG &DAG, SDValue ArgVal, const SDLoc &dl) const; + SDValue LowerFormalArguments_AIX( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const; SDValue LowerFormalArguments_Darwin( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, diff --git a/llvm/test/CodeGen/PowerPC/aix_cc_abi.ll b/llvm/test/CodeGen/PowerPC/aix_cc_abi.ll new file mode 100644 index 0000000000000..b15b63b166f70 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix_cc_abi.ll @@ -0,0 +1,614 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,32BIT %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \ +; RUN: FileCheck --check-prefixes=CHECK,64BIT %s + +define void @call_test_chars() { +entry: + call i8 @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97) + ret void +} + +; CHECK-LABEL: name: call_test_chars + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: $r3 = LI 97 +; 32BIT: $r4 = LI 97 +; 32BIT: $r5 = LI 97 +; 32BIT: $r6 = LI 97 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: $x3 = LI8 97 +; 64BIT: $x4 = LI8 97 +; 64BIT: $x5 = LI8 97 +; 64BIT: $x6 = LI8 97 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define signext i8 @test_chars(i8 signext %c1, i8 signext %c2, i8 signext %c3, i8 signext %c4) { +entry: + %conv = sext i8 %c1 to i32 + %conv1 = sext i8 %c2 to i32 + %add = add nsw i32 %conv, %conv1 + %conv2 = sext i8 %c3 to i32 + %add3 = add nsw i32 %add, %conv2 + %conv4 = sext i8 %c4 to i32 + %add5 = add nsw i32 %add3, %conv4 + %conv6 = trunc i32 %add5 to i8 + ret i8 %conv6 +} + +; CHECK-LABEL: name: test_chars + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT: body: +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT: body: +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + +define void @call_test_chars_mix() { +entry: + call i8 @test_chars_mix(i8 signext 97, i8 zeroext -31, i8 zeroext 97, i8 signext -31) + ret void +} + +; CHECK-LABEL: name: call_test_chars_mix + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: $r3 = LI 97 +; 32BIT: $r4 = LI 225 +; 32BIT: $r5 = LI 97 +; 32BIT: $r6 = LI -31 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: $x3 = LI8 97 +; 64BIT: $x4 = LI8 225 +; 64BIT: $x5 = LI8 97 +; 64BIT: $x6 = LI8 -31 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define signext i8 @test_chars_mix(i8 signext %c1, i8 zeroext %c2, i8 zeroext %c3, i8 signext %c4) { +entry: + %conv = sext i8 %c1 to i32 + %conv1 = zext i8 %c2 to i32 + %add = add nsw i32 %conv, %conv1 + %conv2 = zext i8 %c3 to i32 + %add3 = add nsw i32 %add, %conv2 + %conv4 = sext i8 %c4 to i32 + %add5 = add nsw i32 %add3, %conv4 + %conv6 = trunc i32 %add5 to i8 + ret i8 %conv6 +} + +; CHECK-LABEL: name: test_chars_mix + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT: body: +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT: body: +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + +@global_i1 = global i8 0, align 1 + +define void @test_i1(i1 %b) { + entry: + %frombool = zext i1 %b to i8 + store i8 %frombool, i8* @global_i1, align 1 + ret void +} + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3 +; 32BIT: renamable $r3 = RLWINM killed renamable $r3, 0, 31, 31 +; 32BIT-NEXT: STB killed renamable $r3, 0, killed renamable $r4 :: (store 1 into @global_i1) + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3 +; 64BIT: renamable $r[[REG1:[0-9]+]] = RLWINM renamable $r[[REG1]], 0, 31, 31, implicit killed $x3 +; 64BIT-NEXT: STB killed renamable $r[[REG1]], 0, killed renamable $x4 :: (store 1 into @global_i1) + +define void @call_test_i1() { +entry: + call void @test_i1(i1 1) + ret void +} + +; CHECK-LABEL: name: call_test_i1 + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: $r3 = LI 1 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: $x3 = LI8 1 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define void @test_i1zext(i1 zeroext %b) { + entry: + %frombool = zext i1 %b to i8 + store i8 %frombool, i8 * @global_i1, align 1 + ret void + } + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3 +; CHECK-NOT: RLWINM +; 32BIT: STB killed renamable $r3, 0, killed renamable $r4 :: (store 1 into @global_i1) + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3 +; CHECK-NOT: RLWINM +; 64BIT: STB8 killed renamable $x3, 0, killed renamable $x4 :: (store 1 into @global_i1) + +define i32 @test_ints(i32 signext %a, i32 zeroext %b, i32 zeroext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) { +entry: + %add = add i32 %a, %b + %add1 = add i32 %add, %c + %add2 = add i32 %add1, %d + %add3 = add i32 %add2, %e + %add4 = add i32 %add3, %f + %add5 = add i32 %add4, %g + %add6 = add i32 %add5, %h + ret i32 %add6 +} + +; CHECK-LABEL: name: test_ints + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r9', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r10', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x7', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x8', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x9', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x10', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10 + +define void @call_test_ints() { +entry: + call i32 @test_ints(i32 signext 1, i32 zeroext 1, i32 zeroext 2147483648, i32 signext -2147483648, i32 signext 1, i32 signext 1, i32 signext 1, i32 signext 1) + ret void +} + +; CHECK-LABEL: name: call_test_ints + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: renamable $x3 = LI8 1 +; 64BIT: renamable $x5 = RLDICR killed renamable $x3, 31, 32 +; 64BIT: $x3 = LI8 1 +; 64BIT: $x4 = LI8 1 +; 64BIT: $x6 = LIS8 32768 +; 64BIT: $x7 = LI8 1 +; 64BIT: $x8 = LI8 1 +; 64BIT: $x9 = LI8 1 +; 64BIT: $x10 = LI8 1 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1, implicit-def dead $x3 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define void @call_test_i64() { +entry: + call i64 @test_i64(i64 1, i64 2, i64 3, i64 4) + ret void +} + + +; CHECK-LABEL: name: call_test_i64 + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: $r3 = LI 0 +; 32BIT: $r4 = LI 1 +; 32BIT: $r5 = LI 0 +; 32BIT: $r6 = LI 2 +; 32BIT: $r7 = LI 0 +; 32BIT: $r8 = LI 3 +; 32BIT: $r9 = LI 0 +; 32BIT: $r10 = LI 4 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: $x3 = LI8 1 +; 64BIT: $x4 = LI8 2 +; 64BIT: $x5 = LI8 3 +; 64BIT: $x6 = LI8 4 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define i64 @test_i64(i64 %a, i64 %b, i64 %c, i64 %d) { +entry: + %add = add nsw i64 %a, %b + %add1 = add nsw i64 %add, %c + %add2 = add nsw i64 %add1, %d + ret i64 %add2 +} + +; CHECK-LABEL: name: test_i64 + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r9', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r10', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 + +define void @call_test_int_ptr() { +entry: + %b = alloca i32, align 4 + store i32 0, i32* %b, align 4 + call void @test_int_ptr(i32* %b) + ret void +} + +; CHECK-LABEL: name: call_test_int_ptr + +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: renamable $r3 = ADDI %stack.0.b, 0 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: renamable $x3 = ADDI8 %stack.0.b, 0 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define void @test_int_ptr(i32* %a) { +entry: + %a.addr = alloca i32*, align 8 + store i32* %a, i32** %a.addr, align 8 + ret void +} + +; CHECK-LABEL: name: test_int_ptr + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3 +; 32BIT: STW killed renamable $r3, 0, %stack.0.a.addr :: (store 4 into %ir.a.addr, align 8) + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3 +; 64BIT: STD killed renamable $x3, 0, %stack.0.a.addr :: (store 8 into %ir.a.addr) + + +define i32 @caller(i32 %i) { +entry: + %i.addr = alloca i32, align 4 + %b = alloca i8, align 1 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %cmp = icmp ne i32 %0, 0 + %frombool = zext i1 %cmp to i8 + store i8 %frombool, i8* %b, align 1 + %1 = load i8, i8* %b, align 1 + %tobool = trunc i8 %1 to i1 + %call = call i32 @call_test_bool(i1 zeroext %tobool) + ret i32 %call +} + +declare i32 @call_test_bool(i1 zeroext) + +; CHECK-LABEL: name: caller + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT: liveins: $r3 +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1, implicit-def $r3 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3 +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1, implicit-def $x3 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +@f1 = global float 0.000000e+00, align 4 +@d1 = global double 0.000000e+00, align 8 + +define void @call_test_floats() { +entry: + %0 = load float, float* @f1, align 4 + call float @test_floats(float %0, float %0, float %0) + ret void +} + +; CHECK-LABEL: name: call_test_floats{{.*}} + +; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $f2 = COPY renamable $f1 +; 32BIT-NEXT: $f3 = COPY renamable $f1 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $f2 = COPY renamable $f1 +; 64BIT-NEXT: $f3 = COPY renamable $f1 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define float @test_floats(float %f1, float %f2, float %f3) { +entry: + %add = fadd float %f1, %f2 + %add1 = fadd float %add, %f3 + ret float %add1 +} + +; CHECK-LABEL: name: test_floats{{.*}} + +; CHECK: liveins: +; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' } +; CHECK: body: | +; CHECK-NEXT: bb.0.entry: +; CHECK-NEXT: liveins: $f1, $f2, $f3 + +define void @call_test_fpr_max() { +entry: + %0 = load double, double* @d1, align 8 + call double @test_fpr_max(double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0) + ret void +} + +; CHECK-LABEL: name: call_test_fpr_max{{.*}} + +; 32BIT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $f2 = COPY renamable $f1 +; 32BIT-NEXT: $f3 = COPY renamable $f1 +; 32BIT-NEXT: $f4 = COPY renamable $f1 +; 32BIT-NEXT: $f5 = COPY renamable $f1 +; 32BIT-NEXT: $f6 = COPY renamable $f1 +; 32BIT-NEXT: $f7 = COPY renamable $f1 +; 32BIT-NEXT: $f8 = COPY renamable $f1 +; 32BIT-NEXT: $f9 = COPY renamable $f1 +; 32BIT-NEXT: $f10 = COPY renamable $f1 +; 32BIT-NEXT: $f11 = COPY renamable $f1 +; 32BIT-NEXT: $f12 = COPY renamable $f1 +; 32BIT-NEXT: $f13 = COPY renamable $f1 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x3 = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $f2 = COPY renamable $f1 +; 64BIT-NEXT: $f3 = COPY renamable $f1 +; 64BIT-NEXT: $f4 = COPY renamable $f1 +; 64BIT-NEXT: $f5 = COPY renamable $f1 +; 64BIT-NEXT: $f6 = COPY renamable $f1 +; 64BIT-NEXT: $f7 = COPY renamable $f1 +; 64BIT-NEXT: $f8 = COPY renamable $f1 +; 64BIT-NEXT: $f9 = COPY renamable $f1 +; 64BIT-NEXT: $f10 = COPY renamable $f1 +; 64BIT-NEXT: $f11 = COPY renamable $f1 +; 64BIT-NEXT: $f12 = COPY renamable $f1 +; 64BIT-NEXT: $f13 = COPY renamable $f1 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define double @test_fpr_max(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13) { +entry: + %add = fadd double %d1, %d2 + %add1 = fadd double %add, %d3 + %add2 = fadd double %add1, %d4 + %add3 = fadd double %add2, %d5 + %add4 = fadd double %add3, %d6 + %add5 = fadd double %add4, %d7 + %add6 = fadd double %add5, %d8 + %add7 = fadd double %add6, %d9 + %add8 = fadd double %add7, %d10 + %add9 = fadd double %add8, %d11 + %add10 = fadd double %add9, %d12 + %add11 = fadd double %add10, %d13 + ret double %add11 +} + +; CHECK-LABEL: name: test_fpr_max{{.*}} + +; CHECK: liveins: +; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f4', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f5', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f6', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f7', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f8', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f9', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f10', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f11', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f12', virtual-reg: '' } +; CHECK-NEXT: - { reg: '$f13', virtual-reg: '' } +; CHECK: body: | +; CHECK-NEXT: bb.0.entry: +; CHECK-NEXT: liveins: $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10, $f11, $f12, $f13 + +define void @call_test_mix() { +entry: + %0 = load float, float* @f1, align 4 + %1 = load double, double* @d1, align 8 + call i32 @test_mix(float %0, i32 1, double %1, i8 signext 97) + ret void +} + +; CHECK-LABEL: name: call_test_mix{{.*}} + +; 32BIT: renamable $r[[REG1:[0-9]+]] = LWZtoc @f1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $r[[REG2:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) +; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG1]] :: (dereferenceable load 4 from @f1) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG2]] :: (dereferenceable load 8 from @d1) +; 32BIT-NEXT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT-NEXT: $r4 = LI 1 +; 32BIT-NEXT: $r7 = LI 97 +; 32BIT-NEXT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r4, implicit $f2, implicit killed $r7, implicit $r2, implicit-def $r1 +; 32BIT-NEXT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: renamable $x[[REG1:[0-9]+]] = LDtoc @f1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $x[[REG2:[0-9]+]] = LDtoc @d1, $x2 :: (load 8 from got) +; 64BIT-NEXT: renamable $f1 = LFS 0, killed renamable $x[[REG1]] :: (dereferenceable load 4 from @f1) +; 64BIT-NEXT: renamable $f2 = LFD 0, killed renamable $x[[REG2]] :: (dereferenceable load 8 from @d1) +; 64BIT-NEXT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT-NEXT: $x4 = LI8 1 +; 64BIT-NEXT: $x6 = LI8 97 +; 64BIT-NEXT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x4, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1 +; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + +define i32 @test_mix(float %f, i32 signext %i, double %d, i8 signext %c) { +entry: + %conv = fpext float %f to double + %add = fadd double %conv, %d + %conv1 = fptrunc double %add to float + %conv2 = zext i8 %c to i32 + %add3 = add nsw i32 %i, %conv2 + %conv4 = sitofp i32 %add3 to float + %add5 = fadd float %conv4, %conv1 + %conv6 = fptosi float %add5 to i32 + ret i32 %conv6 +} + +; CHECK-LABEL: name: test_mix{{.*}} + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$f1', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$f2', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $f1, $f2, $r4, $r7 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$f1', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$f2', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $f1, $f2, $x4, $x6 + + +define i64 @callee_mixed_ints(i32 %a, i8 signext %b, i32 %c, i16 signext %d, i64 %e) { +entry: + %conv = zext i8 %b to i32 + %add = add nsw i32 %a, %conv + %add1 = add nsw i32 %add, %c + %conv2 = sext i16 %d to i32 + %add3 = add nsw i32 %add1, %conv2 + %conv4 = sext i32 %add3 to i64 + %add5 = add nsw i64 %conv4, %e + ret i64 %add5 + } + +; CHECK-LABEL: name: callee_mixed_ints + +; 32BIT: liveins: +; 32BIT-NEXT: - { reg: '$r3', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r5', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r6', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' } +; 32BIT-NEXT: - { reg: '$r8', virtual-reg: '' } +; 32BIT: body: | +; 32BIT-NEXT: bb.0.entry: +; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8 + +; 64BIT: liveins: +; 64BIT-NEXT: - { reg: '$x3', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x5', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' } +; 64BIT-NEXT: - { reg: '$x7', virtual-reg: '' } +; 64BIT: body: | +; 64BIT-NEXT: bb.0.entry: +; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6, $x7 diff --git a/llvm/test/CodeGen/PowerPC/aix_fpr_param.ll b/llvm/test/CodeGen/PowerPC/aix_fpr_param.ll deleted file mode 100644 index f92096f3ab7d3..0000000000000 --- a/llvm/test/CodeGen/PowerPC/aix_fpr_param.ll +++ /dev/null @@ -1,150 +0,0 @@ -; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \ -; RUN: FileCheck --check-prefix=32BIT %s - -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \ -; RUN: FileCheck --check-prefix=64BIT %s - -@f1 = global float 0.000000e+00, align 4 -@d1 = global double 0.000000e+00, align 8 - -define void @call_test_float() { -entry: -; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load float, float* @f1, align 4 - call void @test_float(float %0) - ret void -} - -declare void @test_float(float) - -define void @call_test_floats() { -entry: -; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $f2 = COPY renamable $f1 -; 32BIT: $f3 = COPY renamable $f1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $f2 = COPY renamable $f1 -; 64BIT: $f3 = COPY renamable $f1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load float, float* @f1, align 4 - call void @test_floats(float %0, float %0, float %0) - ret void -} - -declare void @test_floats(float, float, float) - -define void @call_test_double() { -entry: -; 32BIT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load 8 from @d1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load double, double* @d1, align 8 - call void @test_double(double %0) - ret void -} - -declare void @test_double(double) - -define void @call_test_fpr_max() { -entry: -; 32BIT: renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $f2 = COPY renamable $f1 -; 32BIT: $f3 = COPY renamable $f1 -; 32BIT: $f4 = COPY renamable $f1 -; 32BIT: $f5 = COPY renamable $f1 -; 32BIT: $f6 = COPY renamable $f1 -; 32BIT: $f7 = COPY renamable $f1 -; 32BIT: $f8 = COPY renamable $f1 -; 32BIT: $f9 = COPY renamable $f1 -; 32BIT: $f10 = COPY renamable $f1 -; 32BIT: $f11 = COPY renamable $f1 -; 32BIT: $f12 = COPY renamable $f1 -; 32BIT: $f13 = COPY renamable $f1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFD 0, killed renamable $x3 :: (dereferenceable load 8 from @d1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $f2 = COPY renamable $f1 -; 64BIT: $f3 = COPY renamable $f1 -; 64BIT: $f4 = COPY renamable $f1 -; 64BIT: $f5 = COPY renamable $f1 -; 64BIT: $f6 = COPY renamable $f1 -; 64BIT: $f7 = COPY renamable $f1 -; 64BIT: $f8 = COPY renamable $f1 -; 64BIT: $f9 = COPY renamable $f1 -; 64BIT: $f10 = COPY renamable $f1 -; 64BIT: $f11 = COPY renamable $f1 -; 64BIT: $f12 = COPY renamable $f1 -; 64BIT: $f13 = COPY renamable $f1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load double, double* @d1, align 8 - call void @test_fpr_max(double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0, double %0) - ret void -} - -declare void @test_fpr_max(double, double, double, double, double, double, double, double, double, double, double, double, double) - -define void @call_test_mix() { -entry: -; 32BIT: renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got) -; 32BIT: renamable $r4 = LWZtoc @d1, $r2 :: (load 4 from got) -; 32BIT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1) -; 32BIT: renamable $f2 = LFD 0, killed renamable $r4 :: (dereferenceable load 8 from @d1) -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r4 = LI 1 -; 32BIT: $r7 = LI 97 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $f1, implicit $r4, implicit $f2, implicit killed $r7, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: renamable $x3 = LDtoc @f1, $x2 :: (load 8 from got) -; 64BIT: renamable $x4 = LDtoc @d1, $x2 :: (load 8 from got) -; 64BIT: renamable $f1 = LFS 0, killed renamable $x3 :: (dereferenceable load 4 from @f1) -; 64BIT: renamable $f2 = LFD 0, killed renamable $x4 :: (dereferenceable load 8 from @d1) -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x4 = LI8 1 -; 64BIT: $x6 = LI8 97 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x4, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - %0 = load float, float* @f1, align 4 - %1 = load double, double* @d1, align 8 - call void @test_mix(float %0, i32 1, double %1, i8 signext 97) - ret void -} - -declare void @test_mix(float, i32, double, i8 signext) diff --git a/llvm/test/CodeGen/PowerPC/aix_gpr_param.ll b/llvm/test/CodeGen/PowerPC/aix_gpr_param.ll deleted file mode 100644 index 42b6f886e687d..0000000000000 --- a/llvm/test/CodeGen/PowerPC/aix_gpr_param.ll +++ /dev/null @@ -1,199 +0,0 @@ -; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \ -; RUN: FileCheck --check-prefix=32BIT %s - -; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \ -; RUN: FileCheck --check-prefix=64BIT %s - -define void @call_test_char() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 97 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 97 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_char(i8 signext 97) - ret void -} - -define void @call_test_chars() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 97 -; 32BIT: $r4 = LI 97 -; 32BIT: $r5 = LI 97 -; 32BIT: $r6 = LI 97 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 97 -; 64BIT: $x4 = LI8 97 -; 64BIT: $x5 = LI8 97 -; 64BIT: $x6 = LI8 97 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97) - ret void -} - -define void @call_test_chars_mix() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 97 -; 32BIT: $r4 = LI 225 -; 32BIT: $r5 = LI 97 -; 32BIT: $r6 = LI -31 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 97 -; 64BIT: $x4 = LI8 225 -; 64BIT: $x5 = LI8 97 -; 64BIT: $x6 = LI8 -31 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_chars_mix(i8 signext 97, i8 zeroext -31, i8 zeroext 97, i8 signext -31) - ret void -} - -define void @call_test_int() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_int(i32 1) - ret void -} - -define void @call_test_ints() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 1 -; 32BIT: $r4 = LI 1 -; 32BIT: $r5 = LI 1 -; 32BIT: $r6 = LI 1 -; 32BIT: $r7 = LI 1 -; 32BIT: $r8 = LI 1 -; 32BIT: $r9 = LI 1 -; 32BIT: $r10 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: $x4 = LI8 1 -; 64BIT: $x5 = LI8 1 -; 64BIT: $x6 = LI8 1 -; 64BIT: $x7 = LI8 1 -; 64BIT: $x8 = LI8 1 -; 64BIT: $x9 = LI8 1 -; 64BIT: $x10 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_ints(i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) - ret void -} - -define void @call_test_ints_64bit() { -entry: -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: renamable $x3 = LI8 1 -; 64BIT: renamable $x5 = RLDICR killed renamable $x3, 31, 32 -; 64BIT: $x3 = LI8 1 -; 64BIT: $x4 = LI8 1 -; 64BIT: $x6 = LIS8 32768 -; 64BIT: $x7 = LI8 1 -; 64BIT: $x8 = LI8 1 -; 64BIT: $x9 = LI8 1 -; 64BIT: $x10 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_ints_64bit(i32 signext 1, i32 zeroext 1, i32 zeroext 2147483648, i32 signext -2147483648, i32 signext 1, i32 signext 1, i32 signext 1, i32 signext 1) - ret void -} - -define void @call_test_i1() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_i1(i1 1) - ret void -} - -define void @call_test_i64() { -entry: -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: $r3 = LI 0 -; 32BIT: $r4 = LI 1 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: $x3 = LI8 1 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - call void @test_i64(i64 1) - ret void -} - -define void @call_test_int_ptr() { -entry: - %b = alloca i32, align 4 -; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 -; 32BIT: renamable $r3 = ADDI %stack.0.b, 0 -; 32BIT: BL_NOP , csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1 -; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 - -; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 -; 64BIT: renamable $x3 = ADDI8 %stack.0.b, 0 -; 64BIT: BL8_NOP , csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1 -; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 - - store i32 0, i32* %b, align 4 - call void @test_int_ptr(i32* %b) - ret void -} - -declare void @test_char(i8 signext) - -declare void @test_chars(i8 signext, i8 signext, i8 signext, i8 signext) - -declare void @test_chars_mix(i8 signext, i8 zeroext, i8 zeroext, i8 signext) - -declare void @test_int(i32) - -declare void @test_ints(i32, i32, i32, i32, i32, i32, i32, i32) - -declare void @test_ints_64bit(i32 signext, i32 zeroext, i32 zeroext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext) - -declare void @test_i1(i1) - -declare void @test_i64(i64) - -declare void @test_int_ptr(i32*) From 905b002c139f039a32ab9bf1fad63d745d12423f Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 29 Nov 2019 19:59:02 +0100 Subject: [PATCH 234/591] Revert "[clangd] Rethink how SelectionTree deals with macros and #includes." This reverts commit 19daa21f841ad45290c923689ee3d25198651a4c. It causes a bunch of failures on a bot that I've been unable to reproduce so far: http://45.33.8.238/mac/3308/step_7.txt --- clang-tools-extra/clangd/Selection.cpp | 414 ++++++------------ clang-tools-extra/clangd/Selection.h | 2 +- .../clangd/unittests/SelectionTests.cpp | 66 +-- .../clangd/unittests/TweakTests.cpp | 26 +- clang/include/clang/Tooling/Syntax/Tokens.h | 5 - clang/lib/Tooling/Syntax/Tokens.cpp | 16 - clang/unittests/Tooling/Syntax/TokensTest.cpp | 15 - 7 files changed, 153 insertions(+), 391 deletions(-) diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp index 54b182b3c7060..5b29b916b33ce 100644 --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -34,283 +34,95 @@ namespace { using Node = SelectionTree::Node; using ast_type_traits::DynTypedNode; -// An IntervalSet maintains a set of disjoint subranges of an array. -// -// Initially, it contains the entire array. -// [-----------------------------------------------------------] -// -// When a range is erased(), it will typically split the array in two. -// Claim: [--------------------] -// after: [----------------] [-------------------] -// -// erase() returns the segments actually erased. Given the state above: -// Claim: [---------------------------------------] -// Out: [---------] [------] -// After: [-----] [-----------] -// -// It is used to track (expanded) tokens not yet associated with an AST node. -// On traversing an AST node, its token range is erased from the unclaimed set. -// The tokens actually removed are associated with that node, and hit-tested -// against the selection to determine whether the node is selected. -template -class IntervalSet { -public: - IntervalSet(llvm::ArrayRef Range) : UnclaimedRanges(&rangeLess) { - UnclaimedRanges.insert(Range); - } - - // Removes the elements of Claim from the set, modifying or removing ranges - // that overlap it. - // Returns the continuous subranges of Claim that were actually removed. - llvm::SmallVector, 4> erase(llvm::ArrayRef Claim) { - llvm::SmallVector, 4> Out; - if (Claim.empty()) - return Out; - // equal_range finds overlapping ranges, because of how we chose <. - auto Overlap = UnclaimedRanges.equal_range(Claim); - if (Overlap.first == Overlap.second) - return Out; - - // General case: - // Claim: [-----------------] - // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-] - // Overlap: ^first ^second - // Ranges C and D are fully included. Ranges B and E must be trimmed. - - // First, copy all overlapping ranges into the output. - auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second); - // If any of the overlapping ranges were sliced by the claim, split them: - // - restrict the returned range to the claimed part - // - save the unclaimed part so it can be reinserted - llvm::ArrayRef RemainingHead, RemainingTail; - if (Claim.begin() > OutFirst->begin()) { - RemainingHead = {OutFirst->begin(), Claim.begin()}; - *OutFirst = {Claim.begin(), OutFirst->end()}; - } - if (Claim.end() < Out.back().end()) { - RemainingTail = {Claim.end(), Out.back().end()}; - Out.back() = {Out.back().begin(), Claim.end()}; - } - - // Erase all the overlapping ranges (invalidating all iterators). - UnclaimedRanges.erase(Overlap.first, Overlap.second); - // Reinsert ranges that were merely trimmed. - if (!RemainingHead.empty()) - UnclaimedRanges.insert(RemainingHead); - if (!RemainingTail.empty()) - UnclaimedRanges.insert(RemainingTail); - - return Out; - } - -private: - using TokenRange = llvm::ArrayRef; - // Given that the ranges we insert are disjoint, there are several ways to - // legally define range < range. - // We choose to define it so overlapping ranges compare equal. - static bool rangeLess(llvm::ArrayRef L, llvm::ArrayRef R) { - return L.end() <= R.begin(); - } - - // Disjoint sorted unclaimed ranges of expanded tokens. - std::set, decltype(&rangeLess)> UnclaimedRanges; -}; - -// Sentinel value for the selectedness of a node where we've seen no tokens yet. -// This resolves to Unselected if no tokens are ever seen. -// But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete. -// This value is never exposed publicly. -constexpr SelectionTree::Selection NoTokens = - static_cast( - static_cast(SelectionTree::Complete + 1)); - -// Nodes start with NoTokens, and then use this function to aggregate the -// selectedness as more tokens are found. -void update(SelectionTree::Selection &Result, SelectionTree::Selection New) { - if (New == NoTokens) - return; - if (Result == NoTokens) - Result = New; - else if (Result != New) - // Can only be completely selected (or unselected) if all tokens are. - Result = SelectionTree::Partial; -} - - -// SelectionTester can determine whether a range of tokens from the PP-expanded -// stream (corresponding to an AST node) is considered selected. -// -// When the tokens result from macro expansions, the appropriate tokens in the -// main file are examined (macro invocation or args). Similarly for #includes. -// -// It tests each token in the range (not just the endpoints) as contiguous -// expanded tokens may not have contiguous spellings (with macros). -// -// Non-token text, and tokens not modeled in the AST (comments, semicolons) -// are ignored when determining selectedness. -class SelectionTester { +// Identifies which tokens are selected, and evaluates claims of source ranges +// by AST nodes. Tokens may be claimed only once: first-come, first-served. +class SelectedTokens { public: - // The selection is offsets [SelBegin, SelEnd) in SelFile. - SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile, - unsigned SelBegin, unsigned SelEnd, const SourceManager &SM) - : SelFile(SelFile), SM(SM) { - // Find all tokens (partially) selected in the file. - auto AllSpelledTokens = Buf.spelledTokens(SelFile); - const syntax::Token *SelFirst = - llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) { - return SM.getFileOffset(Tok.endLocation()) <= SelBegin; - }); - const syntax::Token *SelLimit = std::partition_point( - SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) { - return SM.getFileOffset(Tok.location()) < SelEnd; - }); - // Precompute selectedness and offset for selected spelled tokens. - for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) { + SelectedTokens(llvm::ArrayRef Spelled, const SourceManager &SM, + unsigned SelBegin, unsigned SelEnd) + : SelBegin(SelBegin), SelEnd(SelEnd) { + // Extract bounds and selected-ness for all tokens spelled in the file. + Tokens.reserve(Spelled.size()); + for (const auto& Tok : Spelled) { // As well as comments, don't count semicolons as real tokens. // They're not properly claimed as expr-statement is missing from the AST. - if (T->kind() == tok::comment || T->kind() == tok::semi) + if (Tok.kind() == tok::comment || Tok.kind() == tok::semi) continue; - SpelledTokens.emplace_back(); - Tok &S = SpelledTokens.back(); - S.Offset = SM.getFileOffset(T->location()); - if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd) + + Tokens.emplace_back(); + TokInfo &S = Tokens.back(); + S.StartOffset = SM.getFileOffset(Tok.location()); + S.EndOffset = S.StartOffset + Tok.length(); + if (S.StartOffset >= SelBegin && S.EndOffset <= SelEnd) S.Selected = SelectionTree::Complete; - else + else if (S.EndOffset > SelBegin && S.StartOffset < SelEnd) S.Selected = SelectionTree::Partial; + else + S.Selected = SelectionTree::Unselected; + S.Claimed = false; } } - // Test whether a consecutive range of tokens is selected. - // The tokens are taken from the expanded token stream. - SelectionTree::Selection - test(llvm::ArrayRef ExpandedTokens) const { - if (SpelledTokens.empty()) - return NoTokens; - SelectionTree::Selection Result = NoTokens; - while (!ExpandedTokens.empty()) { - // Take consecutive tokens from the same context together for efficiency. - FileID FID = SM.getFileID(ExpandedTokens.front().location()); - auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) { - return SM.getFileID(T.location()) == FID; - }); - assert(!Batch.empty()); - ExpandedTokens = ExpandedTokens.drop_front(Batch.size()); - - update(Result, testChunk(FID, Batch)); - } - return Result; - } - - // Cheap check whether any of the tokens in R might be selected. - // If it returns false, test() will return NoTokens or Unselected. - // If it returns true, test() may return any value. - bool mayHit(SourceRange R) const { - if (SpelledTokens.empty()) - return false; - auto B = SM.getDecomposedLoc(R.getBegin()); - auto E = SM.getDecomposedLoc(R.getEnd()); - if (B.first == SelFile && E.first == SelFile) - if (E.second < SpelledTokens.front().Offset || - B.second > SpelledTokens.back().Offset) - return false; - return true; - } - -private: - // Hit-test a consecutive range of tokens from a single file ID. - SelectionTree::Selection - testChunk(FileID FID, llvm::ArrayRef Batch) const { - assert(!Batch.empty()); - SourceLocation StartLoc = Batch.front().location(); - // There are several possible categories of FileID depending on how the - // preprocessor was used to generate these tokens: - // main file, #included file, macro args, macro bodies. - // We need to identify the main-file tokens that represent Batch, and - // determine whether we want to exclusively claim them. Regular tokens - // represent one AST construct, but a macro invocation can represent many. - - // Handle tokens written directly in the main file. - if (FID == SelFile) { - return testTokenRange(SM.getFileOffset(Batch.front().location()), - SM.getFileOffset(Batch.back().location())); - } + // Associates any tokens overlapping [Begin, End) with an AST node. + // Tokens that were already claimed by another AST node are not claimed again. + // Updates Result if the node is selected in the sense of SelectionTree. + void claim(unsigned Begin, unsigned End, SelectionTree::Selection &Result) { + assert(Begin <= End); - // Handle tokens in another file #included into the main file. - // Check if the #include is selected, but don't claim it exclusively. - if (StartLoc.isFileID()) { - for (SourceLocation Loc = Batch.front().location(); Loc.isValid(); - Loc = SM.getIncludeLoc(SM.getFileID(Loc))) { - if (SM.getFileID(Loc) == SelFile) - // FIXME: use whole #include directive, not just the filename string. - return testToken(SM.getFileOffset(Loc)); + // Fast-path for missing the selection entirely. + if (Begin >= SelEnd || End <= SelBegin) + return; + + // We will consider the range (at least partially) selected if it hit any + // selected and previously unclaimed token. + bool ClaimedAnyToken = false; + // The selection is (at most) partial if: + // - any claimed token is partially selected + // - any token in the range is unselected + bool PartialSelection = false; + + // Find the first token that (maybe) overlaps the claimed range. + auto Start = llvm::partition_point(Tokens, [&](const TokInfo &Tok) { + return Tok.EndOffset <= Begin; + }); + // Iterate over every token that overlaps the range. + // Claim selected tokens, and update the two result flags. + for (auto It = Start; It != Tokens.end() && It->StartOffset < End; ++It) { + if (It->Selected) { + if (!It->Claimed) { + // Token is selected, in the node's range, and unclaimed; claim it. + It->Claimed = true; + ClaimedAnyToken = true; + // If the token was only partially selected, so is the node. + PartialSelection |= (It->Selected == SelectionTree::Partial); + } + } else { + // If the node covers an unselected token, it's not completely selected. + PartialSelection = true; } - return NoTokens; } - assert(StartLoc.isMacroID()); - // Handle tokens that were passed as a macro argument. - SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc); - if (SM.getFileID(ArgStart) == SelFile) { - SourceLocation ArgEnd = SM.getTopMacroCallerLoc(Batch.back().location()); - return testTokenRange(SM.getFileOffset(ArgStart), - SM.getFileOffset(ArgEnd)); + // If some tokens were previously claimed (Result != Unselected), we may + // upgrade from Partial->Complete, even if no new tokens were claimed. + // Important for [[int a]]. + if (ClaimedAnyToken || Result) { + Result = std::max(Result, PartialSelection ? SelectionTree::Partial + : SelectionTree::Complete); } - - // Handle tokens produced by non-argument macro expansion. - // Check if the macro name is selected, don't claim it exclusively. - auto Expansion = SM.getDecomposedExpansionLoc(StartLoc); - if (Expansion.first == SelFile) - // FIXME: also check ( and ) for function-like macros? - return testToken(Expansion.second); - else - return NoTokens; - } - - // Is the closed token range [Begin, End] selected? - SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const { - assert(Begin <= End); - // Outside the selection entirely? - if (End < SpelledTokens.front().Offset || - Begin > SpelledTokens.back().Offset) - return SelectionTree::Unselected; - - // Compute range of tokens. - auto B = llvm::partition_point( - SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; }); - auto E = std::partition_point( - B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; }); - - // Aggregate selectedness of tokens in range. - bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset || - End > SpelledTokens.back().Offset; - SelectionTree::Selection Result = - ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens; - for (auto It = B; It != E; ++It) - update(Result, It->Selected); - return Result; - } - - // Is the token at `Offset` selected? - SelectionTree::Selection testToken(unsigned Offset) const { - // Outside the selection entirely? - if (Offset < SpelledTokens.front().Offset || - Offset > SpelledTokens.back().Offset) - return SelectionTree::Unselected; - // Find the token, if it exists. - auto It = llvm::partition_point( - SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; }); - if (It != SpelledTokens.end() && It->Offset == Offset) - return It->Selected; - return NoTokens; } - struct Tok { - unsigned Offset; +private: + struct TokInfo { + unsigned StartOffset; + unsigned EndOffset; SelectionTree::Selection Selected; + bool Claimed; + bool operator<(const TokInfo &Other) const { + return StartOffset < Other.StartOffset; + } }; - std::vector SpelledTokens; - FileID SelFile; - const SourceManager &SM; + std::vector Tokens; + unsigned SelBegin, SelEnd; }; // Show the type of a node for debugging. @@ -383,6 +195,16 @@ class SelectionVisitor : public RecursiveASTVisitor { V.TraverseAST(AST); assert(V.Stack.size() == 1 && "Unpaired push/pop?"); assert(V.Stack.top() == &V.Nodes.front()); + // We selected TUDecl if tokens were unclaimed (or the file is empty). + SelectionTree::Selection UnclaimedTokens = SelectionTree::Unselected; + V.Claimed.claim(Begin, End, UnclaimedTokens); + if (UnclaimedTokens || V.Nodes.size() == 1) { + StringRef FileContent = AST.getSourceManager().getBufferData(File); + // Don't require the trailing newlines to be selected. + bool SelectedAll = Begin == 0 && End >= FileContent.rtrim().size(); + V.Stack.top()->Selected = + SelectedAll ? SelectionTree::Complete : SelectionTree::Partial; + } return std::move(V.Nodes); } @@ -467,8 +289,11 @@ class SelectionVisitor : public RecursiveASTVisitor { #ifndef NDEBUG PrintPolicy(PP), #endif - TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM), - UnclaimedExpandedTokens(Tokens.expandedTokens()) { + Claimed(Tokens.spelledTokens(SelFile), SM, SelBegin, SelEnd), + SelFile(SelFile), + SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken( + SM.getComposedLoc(SelFile, SelBegin), SM, LangOpts))), + SelEnd(SelEnd) { // Ensure we have a node for the TU decl, regardless of traversal scope. Nodes.emplace_back(); Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl()); @@ -521,12 +346,18 @@ class SelectionVisitor : public RecursiveASTVisitor { // don't intersect the selection may be recursively skipped. bool canSafelySkipNode(const DynTypedNode &N) { SourceRange S = N.getSourceRange(); - if (!SelChecker.mayHit(S)) { - dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent()); - dlog("{1}skipped range = {0}", S.printToString(SM), indent(1)); - return true; - } - return false; + auto B = SM.getDecomposedLoc(S.getBegin()); + auto E = SM.getDecomposedLoc(S.getEnd()); + // Node lies in a macro expansion? + if (B.first != SelFile || E.first != SelFile) + return false; + // Node intersects selection tokens? + if (B.second < SelEnd && E.second >= SelBeginTokenStart) + return false; + // Otherwise, allow skipping over the node. + dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent()); + dlog("{1}skipped range = {0}", S.printToString(SM), indent(1)); + return true; } // There are certain nodes we want to treat as leaves in the SelectionTree, @@ -546,9 +377,11 @@ class SelectionVisitor : public RecursiveASTVisitor { Nodes.emplace_back(); Nodes.back().ASTNode = std::move(Node); Nodes.back().Parent = Stack.top(); - Nodes.back().Selected = NoTokens; Stack.push(&Nodes.back()); claimRange(Early, Nodes.back().Selected); + // Early hit detection never selects the whole node. + if (Nodes.back().Selected) + Nodes.back().Selected = SelectionTree::Partial; } // Pops a node off the ancestor stack, and finalizes it. Pairs with push(). @@ -557,8 +390,6 @@ class SelectionVisitor : public RecursiveASTVisitor { Node &N = *Stack.top(); dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); claimRange(N.ASTNode.getSourceRange(), N.Selected); - if (N.Selected == NoTokens) - N.Selected = SelectionTree::Unselected; if (N.Selected || !N.Children.empty()) { // Attach to the tree. N.Parent->Children.push_back(&N); @@ -593,12 +424,31 @@ class SelectionVisitor : public RecursiveASTVisitor { // This is usually called from pop(), so we can take children into account. // The existing state of Result is relevant (early/late claims can interact). void claimRange(SourceRange S, SelectionTree::Selection &Result) { - for (const auto &ClaimedRange : - UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S))) - update(Result, SelChecker.test(ClaimedRange)); - - if (Result && Result != NoTokens) - dlog("{1}hit selection: {0}", S.printToString(SM), indent()); + if (!S.isValid()) + return; + // toHalfOpenFileRange() allows selection of constructs in macro args. e.g: + // #define LOOP_FOREVER(Body) for(;;) { Body } + // void IncrementLots(int &x) { + // LOOP_FOREVER( ++x; ) + // } + // Selecting "++x" or "x" will do the right thing. + auto Range = toHalfOpenFileRange(SM, LangOpts, S); + assert(Range && "We should be able to get the File Range"); + dlog("{1}claimRange: {0}", Range->printToString(SM), indent()); + auto B = SM.getDecomposedLoc(Range->getBegin()); + auto E = SM.getDecomposedLoc(Range->getEnd()); + // Otherwise, nodes in macro expansions can't be selected. + if (B.first != SelFile || E.first != SelFile) + return; + // Attempt to claim the remaining range. If there's nothing to claim, only + // children were selected. + Claimed.claim(B.second, E.second, Result); + if (Result) + dlog("{1}hit selection: {0}", + SourceRange(SM.getComposedLoc(B.first, B.second), + SM.getComposedLoc(E.first, E.second)) + .printToString(SM), + indent()); } std::string indent(int Offset = 0) { @@ -613,11 +463,17 @@ class SelectionVisitor : public RecursiveASTVisitor { #ifndef NDEBUG const PrintingPolicy &PrintPolicy; #endif - const syntax::TokenBuffer &TokenBuf; std::stack Stack; - SelectionTester SelChecker; - IntervalSet UnclaimedExpandedTokens; + SelectedTokens Claimed; std::deque Nodes; // Stable pointers as we add more nodes. + FileID SelFile; + // If the selection start slices a token in half, the beginning of that token. + // This is useful for checking whether the end of a token range overlaps + // the selection: range.end < SelBeginTokenStart is equivalent to + // range.end + measureToken(range.end) < SelBegin (assuming range.end points + // to a token), and it saves a lex every time. + unsigned SelBeginTokenStart; + unsigned SelEnd; }; } // namespace diff --git a/clang-tools-extra/clangd/Selection.h b/clang-tools-extra/clangd/Selection.h index a7050c49be6ba..9bcb9d5fb01f0 100644 --- a/clang-tools-extra/clangd/Selection.h +++ b/clang-tools-extra/clangd/Selection.h @@ -76,7 +76,7 @@ class SelectionTree { unsigned Start, unsigned End); // Describes to what extent an AST node is covered by the selection. - enum Selection : unsigned char { + enum Selection { // The AST node owns no characters covered by the selection. // Note that characters owned by children don't count: // if (x == 0) scream(); diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp index ec9fd4185d943..6f4ccd88b978e 100644 --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -134,15 +134,6 @@ TEST(SelectionTest, CommonAncestor) { )cpp", "IfStmt", }, - { - R"cpp( - int x(int); - #define M(foo) x(foo) - int a = 42; - int b = M([[^a]]); - )cpp", - "DeclRefExpr", - }, { R"cpp( void foo(); @@ -387,7 +378,6 @@ TEST(SelectionTest, Selected) { $C[[return]]; }]] else [[{^ }]]]] - char z; } )cpp", R"cpp( @@ -396,10 +386,10 @@ TEST(SelectionTest, Selected) { void foo(^$C[[unique_ptr<$C[[unique_ptr<$C[[int]]>]]>]]^ a) {} )cpp", R"cpp(int a = [[5 >^> 1]];)cpp", - R"cpp( + R"cpp([[ #define ECHO(X) X - ECHO(EC^HO($C[[int]]) EC^HO(a)); - )cpp", + ECHO(EC^HO([[$C[[int]]) EC^HO(a]])); + ]])cpp", R"cpp( $C[[^$C[[int]] a^]]; )cpp", R"cpp( $C[[^$C[[int]] a = $C[[5]]^]]; )cpp", }; @@ -438,56 +428,6 @@ TEST(SelectionTest, PathologicalPreprocessor) { EXPECT_EQ("WhileStmt", T.commonAncestor()->Parent->kind()); } -TEST(SelectionTest, IncludedFile) { - const char *Case = R"cpp( - void test() { -#include "Exp^and.inc" - break; - } - )cpp"; - Annotations Test(Case); - auto TU = TestTU::withCode(Test.code()); - TU.AdditionalFiles["Expand.inc"] = "while(1)\n"; - auto AST = TU.build(); - auto T = makeSelectionTree(Case, AST); - - EXPECT_EQ("WhileStmt", T.commonAncestor()->kind()); -} - -TEST(SelectionTest, MacroArgExpansion) { - // If a macro arg is expanded several times, we consider them all selected. - const char *Case = R"cpp( - int mul(int, int); - #define SQUARE(X) mul(X, X); - int nine = SQUARE(^3); - )cpp"; - Annotations Test(Case); - auto AST = TestTU::withCode(Test.code()).build(); - auto T = makeSelectionTree(Case, AST); - // Unfortunately, this makes the common ancestor the CallExpr... - // FIXME: hack around this by picking one? - EXPECT_EQ("CallExpr", T.commonAncestor()->kind()); - EXPECT_FALSE(T.commonAncestor()->Selected); - EXPECT_EQ(2u, T.commonAncestor()->Children.size()); - for (const auto* N : T.commonAncestor()->Children) { - EXPECT_EQ("IntegerLiteral", N->kind()); - EXPECT_TRUE(N->Selected); - } - - // Verify that the common assert() macro doesn't suffer from this. - // (This is because we don't associate the stringified token with the arg). - Case = R"cpp( - void die(const char*); - #define assert(x) (x ? (void)0 : die(#x) - void foo() { assert(^42); } - )cpp"; - Test = Annotations(Case); - AST = TestTU::withCode(Test.code()).build(); - T = makeSelectionTree(Case, AST); - - EXPECT_EQ("IntegerLiteral", T.commonAncestor()->kind()); -} - TEST(SelectionTest, Implicit) { const char* Test = R"cpp( struct S { S(const char*); }; diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp index dc76999040195..4e481241acd8c 100644 --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -269,7 +269,7 @@ TEST_F(ExtractVariableTest, Test) { EXPECT_UNAVAILABLE(UnavailableCases); // vector of pairs of input and output strings - const std::vector> + const std::vector> InputOutputs = { // extraction from variable declaration/assignment {R"cpp(void varDecl() { @@ -321,10 +321,17 @@ TEST_F(ExtractVariableTest, Test) { if(1) LOOP(5 + [[3]]) })cpp", + /*FIXME: It should be extracted like this. SelectionTree needs to be + * fixed for macros. R"cpp(#define LOOP(x) while (1) {a = x;} + void f(int a) { + auto dummy = 3; if(1) + LOOP(5 + dummy) + })cpp"},*/ + R"cpp(#define LOOP(x) while (1) {a = x;} void f(int a) { - auto dummy = 3; if(1) - LOOP(5 + dummy) + auto dummy = LOOP(5 + 3); if(1) + dummy })cpp"}, {R"cpp(#define LOOP(x) do {x;} while(1); void f(int a) { @@ -637,18 +644,13 @@ void f(const int c) { )cpp"; EXPECT_EQ(apply(TemplateFailInput), "unavailable"); - std::string MacroInput = R"cpp( + // FIXME: This should be extractable after selectionTree works correctly for + // macros (currently it doesn't select anything for the following case) + std::string MacroFailInput = R"cpp( #define F(BODY) void f() { BODY } F ([[int x = 0;]]) )cpp"; - std::string MacroOutput = R"cpp( - #define F(BODY) void f() { BODY } - void extracted() { -int x = 0; -} -F (extracted();) - )cpp"; - EXPECT_EQ(apply(MacroInput), MacroOutput); + EXPECT_EQ(apply(MacroFailInput), "unavailable"); // Shouldn't crash. EXPECT_EQ(apply("void f([[int a]]);"), "unavailable"); diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h index 6f4d0e0c050af..301432d3888b3 100644 --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -175,7 +175,6 @@ class TokenBuffer { /// All tokens produced by the preprocessor after all macro replacements, /// directives, etc. Source locations found in the clang AST will always /// point to one of these tokens. - /// Tokens are in TU order (per SourceManager::isBeforeInTranslationUnit()). /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split /// into two '>' tokens by the parser. However, TokenBuffer currently /// keeps it as a single '>>' token. @@ -183,10 +182,6 @@ class TokenBuffer { return ExpandedTokens; } - /// Returns the subrange of expandedTokens() corresponding to the closed - /// token range R. - llvm::ArrayRef expandedTokens(SourceRange R) const; - /// Find the subrange of spelled tokens that produced the corresponding \p /// Expanded tokens. /// diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 5941507e086d2..a2c3bc137d6ba 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -119,22 +119,6 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const { return Text.substr(Begin, length()); } -llvm::ArrayRef TokenBuffer::expandedTokens(SourceRange R) const { - if (R.isInvalid()) - return {}; - const Token *Begin = - llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { - return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); - }); - const Token *End = - llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { - return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); - }); - if (Begin > End) - return {}; - return {Begin, End}; -} - std::pair TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 2c462d49ee410..6ffe2c43dd0ff 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -40,7 +40,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Annotations.h" #include "llvm/Testing/Support/SupportHelpers.h" -#include "gmock/gmock.h" #include #include #include @@ -664,20 +663,6 @@ TEST_F(TokenBufferTest, SpelledByExpanded) { ValueIs(SameRange(findSpelled("not_mapped")))); } -TEST_F(TokenBufferTest, ExpandedTokensForRange) { - recordTokens(R"cpp( - #define SIGN(X) X##_washere - A SIGN(B) C SIGN(D) E SIGN(F) G - )cpp"); - - SourceRange R(findExpanded("C").front().location(), - findExpanded("F_washere").front().location()); - // Sanity check: expanded and spelled tokens are stored separately. - EXPECT_THAT(Buffer.expandedTokens(R), - SameRange(findExpanded("C D_washere E F_washere"))); - EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); -} - TEST_F(TokenBufferTest, ExpansionStartingAt) { // Object-like macro expansions. recordTokens(R"cpp( From cee62e6fcff6e833bedca1554f6df8f8234e9b13 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Sat, 30 Nov 2019 13:23:49 +0100 Subject: [PATCH 235/591] Fix a typo. --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 89f69bdf37e97..0689f8e4f0c30 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1054,7 +1054,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { // Collect all the relocate calls associated with a statepoint AllRelocateCalls.push_back(Relocate); - // We need atleast one base pointer relocation + one derived pointer + // We need at least one base pointer relocation + one derived pointer // relocation to mangle if (AllRelocateCalls.size() < 2) return false; From b094258661e0064133679b8b51e981eefda07ec7 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Sat, 30 Nov 2019 13:31:16 +0100 Subject: [PATCH 236/591] Updated the OCaml/bitwriter.ml test for OCaml 4.06+ Since OCaml 4.02 (released in 2014), strings and bytes are different types, but up until OCaml 4.06, the compiler defaulted to a compatibility mode "unsafe-string". OCaml 4.06 flips the default to "safe-string", breaking the test. This change should be compatible with OCaml 4.02+, but is only truly necessary for OCaml 4.06+. For more information, see: https://caml.inria.fr/pub/docs/manual-ocaml/libref/String.html https://ocaml.org/releases/4.02.html --- llvm/test/Bindings/OCaml/bitwriter.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Bindings/OCaml/bitwriter.ml b/llvm/test/Bindings/OCaml/bitwriter.ml index 28a61fee91b1b..17111bd3b51e0 100644 --- a/llvm/test/Bindings/OCaml/bitwriter.ml +++ b/llvm/test/Bindings/OCaml/bitwriter.ml @@ -17,7 +17,7 @@ let test x = if not x then exit 1 else () let read_file name = let ic = open_in_bin name in let len = in_channel_length ic in - let buf = String.create len in + let buf = Bytes.create len in test ((input ic buf 0 len) = len); @@ -46,4 +46,4 @@ let _ = test (file_buf = temp_bitcode m); test (file_buf = temp_bitcode ~unbuffered:false m); test (file_buf = temp_bitcode ~unbuffered:true m); - test (file_buf = Llvm.MemoryBuffer.as_string (Llvm_bitwriter.write_bitcode_to_memory_buffer m)) + test (file_buf = Bytes.of_string (Llvm.MemoryBuffer.as_string (Llvm_bitwriter.write_bitcode_to_memory_buffer m))) From c2443155a0fb245c8f17f2c1c72b6ea391e86e81 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Sat, 30 Nov 2019 14:20:11 +0100 Subject: [PATCH 237/591] Revert 651f07908a1 "[AArch64] Don't combine callee-save and local stack adjustment when optimizing for size" This caused asserts (and perhaps also miscompiles) while building for Windows on AArch64. See the discussion on D68530 for details and reproducer. Reverting until this can be investigated and fixed. > For arm64, D18619 introduced the ability to combine bumping the stack pointer > upfront in case it needs to be bumped for both the callee-save area as well as > the local stack area. > > That diff already remarks that "This change can cause an increase in > instructions", but argues that even when that happens, it should be still be a > performance benefit because the number of micro-ops is reduced. > > We have observed that this code-size increase can be significant in practice. > This diff disables combining stack bumping for methods that are marked as > optimize-for-size. > > Example of a prologue with the behavior before this diff (combining stack bumping when possible): > sub sp, sp, #0x40 > stp d9, d8, [sp, #0x10] > stp x20, x19, [sp, #0x20] > stp x29, x30, [sp, #0x30] > add x29, sp, #0x30 > [... compute x8 somehow ...] > stp x0, x8, [sp] > > And after this diff, if the method is marked as optimize-for-size: > stp d9, d8, [sp, #-0x30]! > stp x20, x19, [sp, #0x10] > stp x29, x30, [sp, #0x20] > add x29, sp, #0x20 > [... compute x8 somehow ...] > stp x0, x8, [sp, #-0x10]! > > Note that without combining the stack bump there are two auto-decrements, > nicely folded into the stp instructions, whereas otherwise there is a single > sub sp, ... instruction, but not folded. > > Patch by Nikolai Tillmann! > > Differential Revision: https://reviews.llvm.org/D68530 --- .../Target/AArch64/AArch64FrameLowering.cpp | 3 --- ...r-combine-csr-local-stack-bump-for-size.ll | 25 ------------------- 2 files changed, 28 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index eca9b1e75c2ac..8f88198203d74 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -452,9 +452,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( const AArch64Subtarget &Subtarget = MF.getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - if (MF.getFunction().hasOptSize()) - return false; - if (AFI->getLocalStackSize() == 0) return false; diff --git a/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll b/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll deleted file mode 100644 index 273fb31e16c3b..0000000000000 --- a/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s - -; CHECK-LABEL: main: -; CHECK: stp x29, x30, [sp, #-16]! -; CHECK-NEXT: stp xzr, xzr, [sp, #-16]! -; CHECK: adrp x0, l_.str@PAGE -; CHECK: add x0, x0, l_.str@PAGEOFF -; CHECK-NEXT: bl _puts -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 -; CHECK-NEXT: ret - -@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00" - -define i32 @main() nounwind ssp optsize { -entry: - %local1 = alloca i64, align 8 - %local2 = alloca i64, align 8 - store i64 0, i64* %local1 - store i64 0, i64* %local2 - %call = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0)) - ret i32 %call -} - -declare i32 @puts(i8*) From 3c7f6b439699a9cbbc0ac8d288cc70aff357446b Mon Sep 17 00:00:00 2001 From: Tyker Date: Sat, 30 Nov 2019 16:42:33 +0100 Subject: [PATCH 238/591] [clang][modules] Add support for merging lifetime-extended temporaries Summary: Add support for merging lifetime-extended temporaries Reviewers: rsmith Reviewed By: rsmith Subscribers: xbolva00, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70190 --- clang/include/clang/AST/DeclCXX.h | 4 ++- clang/include/clang/AST/TextNodeDumper.h | 2 ++ clang/include/clang/Serialization/ASTReader.h | 8 ++++++ clang/lib/AST/TextNodeDumper.cpp | 11 ++++++++ clang/lib/Serialization/ASTReaderDecl.cpp | 26 +++++++++++++++++++ .../merge-lifetime-extended-temporary/a.h | 2 ++ .../merge-lifetime-extended-temporary/b.h | 4 +++ .../merge-lifetime-extended-temporary/c.h | 4 +++ .../module.modulemap | 14 ++++++++++ .../merge-lifetime-extended-temporary.cpp | 14 ++++++++++ 10 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap create mode 100644 clang/test/Modules/merge-lifetime-extended-temporary.cpp diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 63d67bd3f55b2..0f2018fb9e8cb 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -3041,7 +3041,9 @@ class NamespaceAliasDecl : public NamedDecl, /// Implicit declaration of a temporary that was materialized by /// a MaterializeTemporaryExpr and lifetime-extended by a declaration -class LifetimeExtendedTemporaryDecl final : public Decl { +class LifetimeExtendedTemporaryDecl final + : public Decl, + public Mergeable { friend class MaterializeTemporaryExpr; friend class ASTDeclReader; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 0ff5a614a864d..d293ea190aa43 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -346,6 +346,8 @@ class TextNodeDumper void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); void VisitBlockDecl(const BlockDecl *D); void VisitConceptDecl(const ConceptDecl *D); + void + VisitLifetimeExtendedTemporaryDecl(const LifetimeExtendedTemporaryDecl *D); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index f0b5e99338232..b6dae68b3413b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -551,6 +551,14 @@ class ASTReader llvm::DenseMap> AnonymousDeclarationsForMerging; + /// Key used to identify LifetimeExtendedTemporaryDecl for merging, + /// containing the lifetime-extending declaration and the mangling number. + using LETemporaryKey = std::pair; + + /// Map of already deserialiazed temporaries. + llvm::DenseMap + LETemporaryForMerging; + struct FileDeclsInfo { ModuleFile *Mod = nullptr; ArrayRef Decls; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 0ff95213118fd..561c76a45cbc2 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1338,6 +1338,17 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { OS << " <<getNumParams() << ">>>"; } +void TextNodeDumper::VisitLifetimeExtendedTemporaryDecl( + const LifetimeExtendedTemporaryDecl *D) { + OS << " extended by "; + dumpBareDeclRef(D->getExtendingDecl()); + OS << " mangling "; + { + ColorScope Color(OS, ShowColors, ValueColor); + OS << D->getManglingNumber(); + } +} + void TextNodeDumper::VisitFieldDecl(const FieldDecl *D) { dumpName(D); dumpType(D->getType()); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 8991a39a70679..d6c57757cf8c8 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -424,6 +424,9 @@ namespace clang { template void mergeMergeable(Mergeable *D); + template <> + void mergeMergeable(Mergeable *D); + void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl); @@ -2358,6 +2361,7 @@ void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( if (Record.readInt()) D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->ManglingNumber = Record.readInt(); + mergeMergeable(D); } std::pair @@ -2555,6 +2559,28 @@ static bool allowODRLikeMergeInC(NamedDecl *ND) { return false; } +/// Attempts to merge LifetimeExtendedTemporaryDecl with +/// identical class definitions from two different modules. +template<> +void ASTDeclReader::mergeMergeable( + Mergeable *D) { + // If modules are not available, there is no reason to perform this merge. + if (!Reader.getContext().getLangOpts().Modules) + return; + + LifetimeExtendedTemporaryDecl *LETDecl = + static_cast(D); + + LifetimeExtendedTemporaryDecl *&LookupResult = + Reader.LETemporaryForMerging[std::make_pair( + LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; + if (LookupResult) + Reader.getContext().setPrimaryMergedDecl(LETDecl, + LookupResult->getCanonicalDecl()); + else + LookupResult = LETDecl; +} + /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h new file mode 100644 index 0000000000000..8adab29eafc76 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h @@ -0,0 +1,2 @@ + +constexpr const int& LETemp = 0; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h new file mode 100644 index 0000000000000..2bd1b096d6073 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h @@ -0,0 +1,4 @@ + +#include "a.h" + +constexpr const int* PtrTemp1 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h new file mode 100644 index 0000000000000..b023eebca49c2 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h @@ -0,0 +1,4 @@ + +#include "a.h" + +constexpr const int* PtrTemp2 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap new file mode 100644 index 0000000000000..1339d627a44af --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap @@ -0,0 +1,14 @@ +module "a" { + export * + header "a.h" +} + +module "b" { + export * + header "b.h" +} + +module "c" { + export * + header "c.h" +} diff --git a/clang/test/Modules/merge-lifetime-extended-temporary.cpp b/clang/test/Modules/merge-lifetime-extended-temporary.cpp new file mode 100644 index 0000000000000..36db948b2c4ef --- /dev/null +++ b/clang/test/Modules/merge-lifetime-extended-temporary.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=1 +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=2 + +// expected-no-diagnostics +#if ORDER == 1 +#include "c.h" +#include "b.h" +#else +#include "b.h" +#include "c.h" +#endif + +static_assert(PtrTemp1 == &LETemp, ""); +static_assert(PtrTemp1 == PtrTemp2, ""); From 3f4b70c79e686117c2754d2c0a5a44c8b6829e79 Mon Sep 17 00:00:00 2001 From: Tyker Date: Sat, 30 Nov 2019 17:52:26 +0100 Subject: [PATCH 239/591] Revert "[clang][modules] Add support for merging lifetime-extended temporaries" This reverts commit 3c7f6b439699a9cbbc0ac8d288cc70aff357446b. --- clang/include/clang/AST/DeclCXX.h | 4 +-- clang/include/clang/AST/TextNodeDumper.h | 2 -- clang/include/clang/Serialization/ASTReader.h | 8 ------ clang/lib/AST/TextNodeDumper.cpp | 11 -------- clang/lib/Serialization/ASTReaderDecl.cpp | 26 ------------------- .../merge-lifetime-extended-temporary/a.h | 2 -- .../merge-lifetime-extended-temporary/b.h | 4 --- .../merge-lifetime-extended-temporary/c.h | 4 --- .../module.modulemap | 14 ---------- .../merge-lifetime-extended-temporary.cpp | 14 ---------- 10 files changed, 1 insertion(+), 88 deletions(-) delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap delete mode 100644 clang/test/Modules/merge-lifetime-extended-temporary.cpp diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 0f2018fb9e8cb..63d67bd3f55b2 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -3041,9 +3041,7 @@ class NamespaceAliasDecl : public NamedDecl, /// Implicit declaration of a temporary that was materialized by /// a MaterializeTemporaryExpr and lifetime-extended by a declaration -class LifetimeExtendedTemporaryDecl final - : public Decl, - public Mergeable { +class LifetimeExtendedTemporaryDecl final : public Decl { friend class MaterializeTemporaryExpr; friend class ASTDeclReader; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index d293ea190aa43..0ff5a614a864d 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -346,8 +346,6 @@ class TextNodeDumper void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); void VisitBlockDecl(const BlockDecl *D); void VisitConceptDecl(const ConceptDecl *D); - void - VisitLifetimeExtendedTemporaryDecl(const LifetimeExtendedTemporaryDecl *D); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index b6dae68b3413b..f0b5e99338232 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -551,14 +551,6 @@ class ASTReader llvm::DenseMap> AnonymousDeclarationsForMerging; - /// Key used to identify LifetimeExtendedTemporaryDecl for merging, - /// containing the lifetime-extending declaration and the mangling number. - using LETemporaryKey = std::pair; - - /// Map of already deserialiazed temporaries. - llvm::DenseMap - LETemporaryForMerging; - struct FileDeclsInfo { ModuleFile *Mod = nullptr; ArrayRef Decls; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 561c76a45cbc2..0ff95213118fd 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1338,17 +1338,6 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { OS << " <<getNumParams() << ">>>"; } -void TextNodeDumper::VisitLifetimeExtendedTemporaryDecl( - const LifetimeExtendedTemporaryDecl *D) { - OS << " extended by "; - dumpBareDeclRef(D->getExtendingDecl()); - OS << " mangling "; - { - ColorScope Color(OS, ShowColors, ValueColor); - OS << D->getManglingNumber(); - } -} - void TextNodeDumper::VisitFieldDecl(const FieldDecl *D) { dumpName(D); dumpType(D->getType()); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index d6c57757cf8c8..8991a39a70679 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -424,9 +424,6 @@ namespace clang { template void mergeMergeable(Mergeable *D); - template <> - void mergeMergeable(Mergeable *D); - void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl); @@ -2361,7 +2358,6 @@ void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( if (Record.readInt()) D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->ManglingNumber = Record.readInt(); - mergeMergeable(D); } std::pair @@ -2559,28 +2555,6 @@ static bool allowODRLikeMergeInC(NamedDecl *ND) { return false; } -/// Attempts to merge LifetimeExtendedTemporaryDecl with -/// identical class definitions from two different modules. -template<> -void ASTDeclReader::mergeMergeable( - Mergeable *D) { - // If modules are not available, there is no reason to perform this merge. - if (!Reader.getContext().getLangOpts().Modules) - return; - - LifetimeExtendedTemporaryDecl *LETDecl = - static_cast(D); - - LifetimeExtendedTemporaryDecl *&LookupResult = - Reader.LETemporaryForMerging[std::make_pair( - LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; - if (LookupResult) - Reader.getContext().setPrimaryMergedDecl(LETDecl, - LookupResult->getCanonicalDecl()); - else - LookupResult = LETDecl; -} - /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h deleted file mode 100644 index 8adab29eafc76..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h +++ /dev/null @@ -1,2 +0,0 @@ - -constexpr const int& LETemp = 0; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h deleted file mode 100644 index 2bd1b096d6073..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h +++ /dev/null @@ -1,4 +0,0 @@ - -#include "a.h" - -constexpr const int* PtrTemp1 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h deleted file mode 100644 index b023eebca49c2..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h +++ /dev/null @@ -1,4 +0,0 @@ - -#include "a.h" - -constexpr const int* PtrTemp2 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap deleted file mode 100644 index 1339d627a44af..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap +++ /dev/null @@ -1,14 +0,0 @@ -module "a" { - export * - header "a.h" -} - -module "b" { - export * - header "b.h" -} - -module "c" { - export * - header "c.h" -} diff --git a/clang/test/Modules/merge-lifetime-extended-temporary.cpp b/clang/test/Modules/merge-lifetime-extended-temporary.cpp deleted file mode 100644 index 36db948b2c4ef..0000000000000 --- a/clang/test/Modules/merge-lifetime-extended-temporary.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=1 -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=2 - -// expected-no-diagnostics -#if ORDER == 1 -#include "c.h" -#include "b.h" -#else -#include "b.h" -#include "c.h" -#endif - -static_assert(PtrTemp1 == &LETemp, ""); -static_assert(PtrTemp1 == PtrTemp2, ""); From a46b959ebd605e9dc4bc0e041dac3bdf2f3d8989 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 30 Nov 2019 14:14:55 +0000 Subject: [PATCH 240/591] [InstCombine] More usub_sat tests. NFC. --- .../builtin-dynamic-object-size.ll | 2 +- .../InstCombine/unsigned_saturated_sub.ll | 299 ++++++++++++++++++ 2 files changed, 300 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll b/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll index eabe3a4c4b7a6..96d0772da38a3 100644 --- a/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll +++ b/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll @@ -1,4 +1,4 @@ -; RUN: opt -instcombine -S < %s | FileCheck %s --dump-input-on-failure +; RUN: opt -instcombine -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll index 44aa7deb4acc5..a5b15040ac726 100644 --- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll +++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -5,6 +5,8 @@ ; usub.sat() intrinsics is tested here. declare void @use(i64) +declare void @usei32(i32) +declare void @usei1(i1) ; (a > b) ? a - b : 0 -> usub.sat(a, b) @@ -32,6 +34,52 @@ define i64 @max_sub_uge(i64 %a, i64 %b) { ret i64 %sel } +define i64 @max_sub_uge_extrause1(i64 %a, i64 %b) { +; CHECK-LABEL: @max_sub_uge_extrause1( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] +; CHECK-NEXT: call void @use(i64 [[SUB]]) +; CHECK-NEXT: ret i64 [[SEL]] +; + %cmp = icmp uge i64 %a, %b + %sub = sub i64 %a, %b + %sel = select i1 %cmp, i64 %sub ,i64 0 + call void @use(i64 %sub) + ret i64 %sel +} + +define i64 @max_sub_uge_extrause2(i64 %a, i64 %b) { +; CHECK-LABEL: @max_sub_uge_extrause2( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: call void @usei1(i1 [[CMP]]) +; CHECK-NEXT: ret i64 [[TMP1]] +; + %cmp = icmp uge i64 %a, %b + %sub = sub i64 %a, %b + %sel = select i1 %cmp, i64 %sub ,i64 0 + call void @usei1(i1 %cmp) + ret i64 %sel +} + +define i64 @max_sub_uge_extrause3(i64 %a, i64 %b) { +; CHECK-LABEL: @max_sub_uge_extrause3( +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0 +; CHECK-NEXT: call void @use(i64 [[SUB]]) +; CHECK-NEXT: call void @usei1(i1 [[CMP]]) +; CHECK-NEXT: ret i64 [[SEL]] +; + %cmp = icmp uge i64 %a, %b + %sub = sub i64 %a, %b + %sel = select i1 %cmp, i64 %sub ,i64 0 + call void @use(i64 %sub) + call void @usei1(i1 %cmp) + ret i64 %sel +} + ; Again, with vectors: ; (a > b) ? a - b : 0 -> usub.sat(a, b) @@ -140,6 +188,53 @@ define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) { ret i64 %sel } +define i64 @neg_max_sub_ugt_sel_swapped_extrause1(i64 %a, i64 %b) { +; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped_extrause1( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] +; CHECK-NEXT: call void @usei1(i1 [[CMP]]) +; CHECK-NEXT: ret i64 [[TMP2]] +; + %cmp = icmp ugt i64 %b, %a + %sub = sub i64 %b, %a + %sel = select i1 %cmp, i64 0 ,i64 %sub + call void @usei1(i1 %cmp) + ret i64 %sel +} + +define i64 @neg_max_sub_ugt_sel_swapped_extrause2(i64 %a, i64 %b) { +; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped_extrause2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] +; CHECK-NEXT: call void @use(i64 [[SUB]]) +; CHECK-NEXT: ret i64 [[SEL]] +; + %cmp = icmp ugt i64 %b, %a + %sub = sub i64 %b, %a + %sel = select i1 %cmp, i64 0 ,i64 %sub + call void @use(i64 %sub) + ret i64 %sel +} + +define i64 @neg_max_sub_ugt_sel_swapped_extrause3(i64 %a, i64 %b) { +; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped_extrause3( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] +; CHECK-NEXT: call void @use(i64 [[SUB]]) +; CHECK-NEXT: call void @usei1(i1 [[CMP]]) +; CHECK-NEXT: ret i64 [[SEL]] +; + %cmp = icmp ugt i64 %b, %a + %sub = sub i64 %b, %a + %sel = select i1 %cmp, i64 0 ,i64 %sub + call void @use(i64 %sub) + call void @usei1(i1 %cmp) + ret i64 %sel +} + ; ((a < b) ? 0 : b - a) -> -usub.sat(a, b) define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) { @@ -158,3 +253,207 @@ define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) { ret i64 %sel } +define i32 @max_sub_ugt_c1(i32 %a) { +; CHECK-LABEL: @max_sub_ugt_c1( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ugt i32 %a, 1 + %sub = add i32 %a, -1 + %sel = select i1 %cmp, i32 %sub ,i32 0 + ret i32 %sel +} + +define i32 @max_sub_ugt_c01(i32 %a) { +; CHECK-LABEL: @max_sub_ugt_c01( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 0, i32 [[SUB]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ugt i32 %a, 0 + %sub = add i32 %a, -1 + %sel = select i1 %cmp, i32 %sub ,i32 0 + ret i32 %sel +} + +define i32 @max_sub_ugt_c10(i32 %a) { +; CHECK-LABEL: @max_sub_ugt_c10( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 10 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -10 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ugt i32 %a, 10 + %sub = add i32 %a, -10 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ugt_c910(i32 %a) { +; CHECK-LABEL: @max_sub_ugt_c910( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 9 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -10 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ugt i32 %a, 9 + %sub = add i32 %a, -10 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ugt_c1110(i32 %a) { +; CHECK-LABEL: @max_sub_ugt_c1110( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 11 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -10 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ugt i32 %a, 11 + %sub = add i32 %a, -10 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ugt_c0(i32 %a) { +; CHECK-LABEL: @max_sub_ugt_c0( +; CHECK-NEXT: ret i32 0 +; + %cmp = icmp ugt i32 %a, -1 + %sub = add i32 %a, 0 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ugt_cmiss(i32 %a) { +; CHECK-LABEL: @max_sub_ugt_cmiss( +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 1 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ugt i32 %a, 1 + %sub = add i32 %a, -2 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ult_c1(i32 %a) { +; CHECK-LABEL: @max_sub_ult_c1( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[SEL:%.*]] = sext i1 [[CMP]] to i32 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ult i32 %a, 1 + %sub = add i32 %a, -1 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ult_c2(i32 %a) { +; CHECK-LABEL: @max_sub_ult_c2( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ult i32 %a, 2 + %sub = add i32 %a, -2 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ult_c2_oneuseicmp(i32 %a) { +; CHECK-LABEL: @max_sub_ult_c2_oneuseicmp( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: call void @usei1(i1 [[CMP]]) +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ult i32 %a, 2 + %sub = add i32 %a, -2 + %sel = select i1 %cmp, i32 %sub, i32 0 + call void @usei1(i1 %cmp) + ret i32 %sel +} + +define i32 @max_sub_ult_c2_oneusesub(i32 %a) { +; CHECK-LABEL: @max_sub_ult_c2_oneusesub( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 2 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: call void @usei32(i32 [[SUB]]) +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ult i32 %a, 2 + %sub = add i32 %a, -2 + %sel = select i1 %cmp, i32 %sub, i32 0 + call void @usei32(i32 %sub) + ret i32 %sel +} + +define i32 @max_sub_ult_c32(i32 %a) { +; CHECK-LABEL: @max_sub_ult_c32( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 3 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ult i32 %a, 3 + %sub = add i32 %a, -2 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ugt_c32(i32 %a) { +; CHECK-LABEL: @max_sub_ugt_c32( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 3 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ugt i32 3, %a + %sub = add i32 %a, -2 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_uge_c32(i32 %a) { +; CHECK-LABEL: @max_sub_uge_c32( +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 3 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp uge i32 2, %a + %sub = add i32 %a, -2 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ult_c12(i32 %a) { +; CHECK-LABEL: @max_sub_ult_c12( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 -2, i32 0 +; CHECK-NEXT: ret i32 [[SEL]] +; + %cmp = icmp ult i32 %a, 1 + %sub = add i32 %a, -2 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + +define i32 @max_sub_ult_c0(i32 %a) { +; CHECK-LABEL: @max_sub_ult_c0( +; CHECK-NEXT: ret i32 0 +; + %cmp = icmp ult i32 %a, 0 + %sub = add i32 %a, -1 + %sel = select i1 %cmp, i32 %sub, i32 0 + ret i32 %sel +} + From 3a1bef5616c3eb466b5f8a076385ee09e5a9ad9a Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 30 Nov 2019 14:20:55 +0000 Subject: [PATCH 241/591] [InstCombine] Adjust usub_sat fold one use checks This adjusts the one use checks in the the usub_sat fold code to not increase instruction count, but otherwise do the fold. Reviewed as a part of D69514. --- .../InstCombine/InstCombineSelect.cpp | 6 +++--- .../InstCombine/unsigned_saturated_sub.ll | 19 +++++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index bdfbd75d31a84..0b6b7c3c66d1b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -711,9 +711,9 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B)))) return nullptr; - // If sub is used anywhere else, we wouldn't be able to eliminate it - // afterwards. - if (!TrueVal->hasOneUse()) + // If we are adding a negate and the sub and icmp are used anywhere else, we + // would end up with more instructions. + if (IsNegative && !TrueVal->hasOneUse() && !ICI->hasOneUse()) return nullptr; // (a > b) ? a - b : 0 -> usub.sat(a, b) diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll index a5b15040ac726..4fa1b182d6713 100644 --- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll +++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -36,11 +36,10 @@ define i64 @max_sub_uge(i64 %a, i64 %b) { define i64 @max_sub_uge_extrause1(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_uge_extrause1( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 [[B]]) ; CHECK-NEXT: call void @use(i64 [[SUB]]) -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp uge i64 %a, %b %sub = sub i64 %a, %b @@ -67,10 +66,10 @@ define i64 @max_sub_uge_extrause3(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_uge_extrause3( ; CHECK-NEXT: [[CMP:%.*]] = icmp uge i64 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 [[B]]) ; CHECK-NEXT: call void @use(i64 [[SUB]]) ; CHECK-NEXT: call void @usei1(i1 [[CMP]]) -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: ret i64 [[TMP1]] ; %cmp = icmp uge i64 %a, %b %sub = sub i64 %a, %b @@ -205,11 +204,11 @@ define i64 @neg_max_sub_ugt_sel_swapped_extrause1(i64 %a, i64 %b) { define i64 @neg_max_sub_ugt_sel_swapped_extrause2(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped_extrause2( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[TMP1]] ; CHECK-NEXT: call void @use(i64 [[SUB]]) -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: ret i64 [[TMP2]] ; %cmp = icmp ugt i64 %b, %a %sub = sub i64 %b, %a From 59b56e5c579c51e1333b4c6e96d127f50f191c14 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 30 Nov 2019 16:39:29 +0000 Subject: [PATCH 242/591] [InstCombine] Expand usub_sat patterns to handle constants The constants come through as add %x, -C, not a sub as would be expected. They need some extra matchers to canonicalise them towards usub_sat. Differential Revision: https://reviews.llvm.org/D69514 --- .../InstCombine/InstCombineSelect.cpp | 14 ++++++-- .../builtin-dynamic-object-size.ll | 6 ++-- .../InstCombine/unsigned_saturated_sub.ll | 33 ++++++++----------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 0b6b7c3c66d1b..05a624fde86b6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -704,11 +704,19 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, assert((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) && "Unexpected isUnsigned predicate!"); - // Account for swapped form of subtraction: ((a > b) ? b - a : 0). + // Ensure the sub is of the form: + // (a > b) ? a - b : 0 -> usub.sat(a, b) + // (a > b) ? b - a : 0 -> -usub.sat(a, b) + // Checking for both a-b and a+(-b) as a constant. bool IsNegative = false; - if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A)))) + const APInt *C; + if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A))) || + (match(A, m_APInt(C)) && + match(TrueVal, m_Add(m_Specific(B), m_SpecificInt(-*C))))) IsNegative = true; - else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B)))) + else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B))) && + !(match(B, m_APInt(C)) && + match(TrueVal, m_Add(m_Specific(A), m_SpecificInt(-*C))))) return nullptr; // If we are adding a negate and the sub and icmp are used anywhere else, we diff --git a/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll b/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll index 96d0772da38a3..4093a121060c4 100644 --- a/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll +++ b/llvm/test/Transforms/InstCombine/builtin-dynamic-object-size.ll @@ -48,10 +48,8 @@ entry: ; CHECK: define i64 @internal_pointer(i64 %sz) ; CHECK-NEXT: entry: -; CHECK-NEXT: %0 = add i64 %sz, -2 -; CHECK-NEXT: %1 = icmp ult i64 %sz, 2 -; CHECK-NEXT: %2 = select i1 %1, i64 0, i64 %0 -; CHECK-NEXT: ret i64 %2 +; CHECK-NEXT: %0 = call i64 @llvm.usub.sat.i64(i64 %sz, i64 2) +; CHECK-NEXT: ret i64 %0 ; CHECK-NEXT: } define i64 @uses_nullptr_no_fold() { diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll index 4fa1b182d6713..60da66b7a388a 100644 --- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll +++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -254,10 +254,8 @@ define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) { define i32 @max_sub_ugt_c1(i32 %a) { ; CHECK-LABEL: @max_sub_ugt_c1( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 1 -; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A:%.*]], i32 1) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp ugt i32 %a, 1 %sub = add i32 %a, -1 @@ -280,10 +278,8 @@ define i32 @max_sub_ugt_c01(i32 %a) { define i32 @max_sub_ugt_c10(i32 %a) { ; CHECK-LABEL: @max_sub_ugt_c10( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 10 -; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -10 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A:%.*]], i32 10) +; CHECK-NEXT: ret i32 [[TMP1]] ; %cmp = icmp ugt i32 %a, 10 %sub = add i32 %a, -10 @@ -354,10 +350,9 @@ define i32 @max_sub_ult_c1(i32 %a) { define i32 @max_sub_ult_c2(i32 %a) { ; CHECK-LABEL: @max_sub_ult_c2( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 2 -; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 2, i32 [[A:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp = icmp ult i32 %a, 2 %sub = add i32 %a, -2 @@ -368,10 +363,10 @@ define i32 @max_sub_ult_c2(i32 %a) { define i32 @max_sub_ult_c2_oneuseicmp(i32 %a) { ; CHECK-LABEL: @max_sub_ult_c2_oneuseicmp( ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 2 -; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 2, i32 [[A]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]] ; CHECK-NEXT: call void @usei1(i1 [[CMP]]) -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp = icmp ult i32 %a, 2 %sub = add i32 %a, -2 @@ -382,11 +377,11 @@ define i32 @max_sub_ult_c2_oneuseicmp(i32 %a) { define i32 @max_sub_ult_c2_oneusesub(i32 %a) { ; CHECK-LABEL: @max_sub_ult_c2_oneusesub( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 2 -; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 0 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[A:%.*]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 2, i32 [[A]]) +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]] ; CHECK-NEXT: call void @usei32(i32 [[SUB]]) -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %cmp = icmp ult i32 %a, 2 %sub = add i32 %a, -2 From 40dfc6dff10bd8881c6df31884e2184bbaab5698 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 30 Nov 2019 11:12:07 -0800 Subject: [PATCH 243/591] [X86] Add floating point execution domain to comi/ucomi/cvtss2si/cvtsd2si/cvttss2si/cvttsd2si/cvtsi2ss/cvtsi2sd instructions. --- llvm/lib/Target/X86/X86InstrAVX512.td | 7 +- llvm/lib/Target/X86/X86InstrSSE.td | 116 ++++++++++++++---------- llvm/test/CodeGen/X86/avx512-cvt.ll | 48 +++++----- llvm/test/CodeGen/X86/ftrunc.ll | 12 +-- llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll | 2 +- llvm/test/CodeGen/X86/pr42905.ll | 2 +- llvm/test/CodeGen/X86/sqrt-partial.ll | 4 +- llvm/test/CodeGen/X86/undef-label.ll | 2 +- llvm/test/CodeGen/X86/vec_fp_to_int.ll | 16 ++-- llvm/test/CodeGen/X86/vec_int_to_fp.ll | 106 +++++++++++----------- 10 files changed, 170 insertions(+), 145 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 637102e47fd36..249da7b888f3d 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6998,6 +6998,7 @@ multiclass avx512_vcvtsi opc, SDPatternOperator OpNode, X86FoldableSched RegisterClass SrcRC, X86VectorVTInfo DstVT, X86MemOperand x86memop, PatFrag ld_frag, string asm, string mem> { +let ExeDomain = DstVT.ExeDomain in { let hasSideEffects = 0, isCodeGenOnly = 1 in { def rr : SI opc, SDPatternOperator OpNode, X86FoldableSched (OpNode (DstVT.VT DstVT.RC:$src1), (ld_frag addr:$src2)))]>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; +} def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", (!cast(NAME#"rr_Int") DstVT.RC:$dst, DstVT.RC:$src1, SrcRC:$src2), 0, "att">; @@ -7032,6 +7034,7 @@ multiclass avx512_vcvtsi_round opc, SDNode OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm, string mem> { + let ExeDomain = DstVT.ExeDomain in def rrb_Int : SI opc, X86VectorVTInfo SrcVT, SDNode OpNodeRnd, X86FoldableSchedWrite sched, string asm, string aliasStr> { - let Predicates = [HasAVX512] in { + let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { def rr_Int : SI, @@ -7286,7 +7289,7 @@ multiclass avx512_cvt_s_all opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, SDNode OpNodeInt, SDNode OpNodeSAE, X86FoldableSchedWrite sched, string aliasStr>{ -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in { let isCodeGenOnly = 1 in { def rr : AVX512 opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, string mem, X86FoldableSchedWrite sched, + Domain d, SchedRead Int2Fpu = ReadDefault> { + let ExeDomain = d in { def rr : SI, @@ -832,6 +834,7 @@ multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, mem#"\t{$src, $dst|$dst, $src}", [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, Sched<[sched.Folded]>; + } } multiclass sse12_cvt_p opc, RegisterClass RC, X86MemOperand x86memop, @@ -851,8 +854,8 @@ let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { multiclass sse12_vcvt_avx opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm, string mem, - X86FoldableSchedWrite sched> { -let hasSideEffects = 0, Predicates = [UseAVX] in { + X86FoldableSchedWrite sched, Domain d> { +let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in { def rr : SI, Sched<[sched, ReadDefault, ReadInt2Fpu]>; @@ -867,19 +870,19 @@ let hasSideEffects = 0, Predicates = [UseAVX] in { let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; } @@ -889,13 +892,17 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, // where appropriate to do so. let isCodeGenOnly = 1 in { defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", - WriteCvtI2SS>, XS, VEX_4V, VEX_LIG, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, + VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", - WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, + VEX_W, VEX_LIG, SIMD_EXC; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", - WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; + WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, + VEX_LIG; defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", - WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; + WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, + VEX_W, VEX_LIG, SIMD_EXC; } // isCodeGenOnly = 1 let Predicates = [UseAVX] in { @@ -921,28 +928,28 @@ let Predicates = [UseAVX] in { let isCodeGenOnly = 1 in { defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, REX_W, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, REX_W, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss", "cvtsi2ss{l}", - WriteCvtI2SS, ReadInt2Fpu>, XS, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss", "cvtsi2ss{q}", - WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd", "cvtsi2sd{l}", - WriteCvtI2SD, ReadInt2Fpu>, XD; + WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD; defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd", "cvtsi2sd{q}", - WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; + WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; } // isCodeGenOnly = 1 // Conversion Instructions Intrinsics - Match intrinsics which expect MM @@ -951,7 +958,8 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, ValueType DstVT, ValueType SrcVT, SDNode OpNode, Operand memop, ComplexPattern mem_cpat, string asm, - X86FoldableSchedWrite sched> { + X86FoldableSchedWrite sched, Domain d> { +let ExeDomain = d in { def rr_Int : SI, @@ -961,12 +969,13 @@ multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>, Sched<[sched.Folded]>; } +} multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm, string mem, X86FoldableSchedWrite sched, - bit Is2Addr = 1> { -let hasSideEffects = 0 in { + Domain d, bit Is2Addr = 1> { +let hasSideEffects = 0, ExeDomain = d in { def rr_Int : SI, XD, VEX, VEX_LIG; + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", - WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG; + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; } defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, - sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD; + sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, + SSEPackedDouble>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, - sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; + sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, + SSEPackedDouble>, XD, REX_W; } let Predicates = [UseAVX] in { defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, SIMD_EXC; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, + XS, VEX_4V, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, + XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; + i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, + XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, + XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; } let Constraints = "$src1 = $dst" in { defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS, SIMD_EXC; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, + XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W, SIMD_EXC; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, + XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD; + i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, + XD; defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W, SIMD_EXC; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, + XD, REX_W, SIMD_EXC; } def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1052,32 +1071,34 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", - WriteCvtSS2I>, XS, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, X86cvtts2Int, ssmem, sse_load_f32, - "cvttss2si", WriteCvtSS2I>, + "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG, VEX_W; defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", - WriteCvtSS2I>, XD, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, - "cvttsd2si", WriteCvtSS2I>, + "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG, VEX_W; } let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I, SSEPackedSingle>, XS; defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, X86cvtts2Int, ssmem, sse_load_f32, - "cvttss2si", WriteCvtSS2I>, XS, REX_W; + "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, + XS, REX_W; defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", - WriteCvtSD2I>, XD; + WriteCvtSD2I, SSEPackedDouble>, XD; defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, - "cvttsd2si", WriteCvtSD2I>, XD, REX_W; + "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, + XD, REX_W; } def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", @@ -1117,18 +1138,18 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; } let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I, SSEPackedSingle>, XS; defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, REX_W; + WriteCvtSS2I, SSEPackedSingle>, XS, REX_W; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, "vcvtdq2ps\t{$src, $dst|$dst, $src}", @@ -1817,7 +1838,8 @@ multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, PatFrag ld_frag, string OpcodeStr, Domain d, X86FoldableSchedWrite sched = WriteFCom> { -let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, + ExeDomain = d in { def rr: SI, @@ -1837,7 +1859,7 @@ multiclass sse12_ord_cmp_int opc, RegisterClass RC, SDNode OpNode, ComplexPattern mem_cpat, string OpcodeStr, Domain d, X86FoldableSchedWrite sched = WriteFCom> { -let Uses = [MXCSR], mayRaiseFPException = 1 in { +let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = d in { def rr_Int: SI, diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index 6f7247388640a..e6b43c07fe056 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -25,25 +25,25 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -69,12 +69,12 @@ define <4 x double> @slto4f64(<4 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; NODQ-NEXT: retq ; @@ -100,7 +100,7 @@ define <2 x double> @slto2f64(<2 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; NODQ-NEXT: retq ; ; VLDQ-LABEL: slto2f64: @@ -140,7 +140,7 @@ define <2 x float> @sltof2f32(<2 x i64> %a) { ; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; VLNODQ-NEXT: vmovq %xmm0, %rax ; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VLNODQ-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; VLNODQ-NEXT: retq ; @@ -1040,13 +1040,13 @@ define <16 x float> @slto16f32(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3 +; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] @@ -1094,25 +1094,25 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -1138,25 +1138,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2 @@ -1164,25 +1164,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vpextrq $1, %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; NODQ-NEXT: retq @@ -1275,13 +1275,13 @@ define <16 x float> @ulto16f32(<16 x i64> %a) { ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3 +; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll index 448c21d93ac8e..92118100bba84 100644 --- a/llvm/test/CodeGen/X86/ftrunc.ll +++ b/llvm/test/CodeGen/X86/ftrunc.ll @@ -289,12 +289,12 @@ define <2 x double> @trunc_signed_v2f64(<2 x double> %x) #0 { ; SSE2-LABEL: trunc_signed_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm0, %rax -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: cvttsd2si %xmm0, %rcx ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 ; SSE2-NEXT: cvtsi2sd %rcx, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: trunc_signed_v2f64: @@ -315,20 +315,20 @@ define <4 x double> @trunc_signed_v4f64(<4 x double> %x) #0 { ; SSE2-LABEL: trunc_signed_v4f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm1, %rax -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE2-NEXT: cvttsd2si %xmm1, %rcx ; SSE2-NEXT: cvttsd2si %xmm0, %rdx -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: cvttsd2si %xmm0, %rsi ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rdx, %xmm0 ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rsi, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 ; SSE2-NEXT: cvtsi2sd %rcx, %xmm2 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: trunc_signed_v4f64: diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll index 8d43a1b73234c..980956bdaa88c 100644 --- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -321,7 +321,7 @@ define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" { ; ; ALL-LABEL: test_zext_cmp11: ; ALL: # %bb.0: # %entry -; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; ALL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; ALL-NEXT: vucomisd %xmm2, %xmm0 ; ALL-NEXT: sete %al ; ALL-NEXT: vucomisd %xmm2, %xmm1 diff --git a/llvm/test/CodeGen/X86/pr42905.ll b/llvm/test/CodeGen/X86/pr42905.ll index bb51aced225c6..310a173f824e9 100644 --- a/llvm/test/CodeGen/X86/pr42905.ll +++ b/llvm/test/CodeGen/X86/pr42905.ll @@ -11,7 +11,7 @@ define <4 x double> @autogen_SD30452(i1 %L230) { ; CHECK-NEXT: movq %xmm2, %rax ; CHECK-NEXT: xorps %xmm2, %xmm2 ; CHECK-NEXT: cvtsi2sd %rax, %xmm2 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sqrt-partial.ll b/llvm/test/CodeGen/X86/sqrt-partial.ll index 7ed68c1084998..48914d8ed44e0 100644 --- a/llvm/test/CodeGen/X86/sqrt-partial.ll +++ b/llvm/test/CodeGen/X86/sqrt-partial.ll @@ -38,7 +38,7 @@ define float @f(float %val) nounwind { define double @d(double %val) nounwind { ; SSE-LABEL: d: ; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: xorpd %xmm1, %xmm1 ; SSE-NEXT: ucomisd %xmm1, %xmm0 ; SSE-NEXT: jb .LBB1_2 ; SSE-NEXT: # %bb.1: # %.split @@ -49,7 +49,7 @@ define double @d(double %val) nounwind { ; ; AVX-LABEL: d: ; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vucomisd %xmm1, %xmm0 ; AVX-NEXT: jb .LBB1_2 ; AVX-NEXT: # %bb.1: # %.split diff --git a/llvm/test/CodeGen/X86/undef-label.ll b/llvm/test/CodeGen/X86/undef-label.ll index b4be383d55ddc..56e0ca907f8e1 100644 --- a/llvm/test/CodeGen/X86/undef-label.ll +++ b/llvm/test/CodeGen/X86/undef-label.ll @@ -11,7 +11,7 @@ define void @xyz() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl $g, %eax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: ucomisd %xmm1, %xmm0 ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: jnp .LBB0_2 diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index fc3233327a558..bf2ea5e067cc1 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -21,7 +21,7 @@ define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] @@ -125,13 +125,13 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] @@ -335,7 +335,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { ; SSE-LABEL: fptoui_2f64_to_4i32: ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movd %ecx, %xmm1 @@ -409,7 +409,7 @@ define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -482,7 +482,7 @@ define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -734,13 +734,13 @@ define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 269879e7f1a31..1d0106b75a84f 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -27,8 +27,8 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_2i64_to_2f64: @@ -38,7 +38,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_2i64_to_2f64: @@ -47,7 +47,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_2i64_to_2f64: @@ -56,7 +56,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_2i64_to_2f64: @@ -65,7 +65,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_2i64_to_2f64: @@ -237,16 +237,16 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: cvtsi2sd %rax, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; SSE2-NEXT: movaps %xmm2, %xmm0 -; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm3, %xmm1 ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_4i64_to_4f64: @@ -256,14 +256,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2sd %rax, %xmm2 ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2sd %rax, %xmm1 -; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE41-NEXT: retq ; ; AVX1-LABEL: sitofp_4i64_to_4f64: @@ -273,12 +273,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -289,12 +289,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX2-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX2-NEXT: vmovq %xmm1, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -305,12 +305,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -321,12 +321,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -1204,7 +1204,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1235,7 +1235,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE2-NEXT: retq ; @@ -1274,7 +1274,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1304,7 +1304,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; SSE2-NEXT: retq ; @@ -1342,7 +1342,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1927,7 +1927,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -2074,7 +2074,7 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -2216,7 +2216,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -3023,7 +3023,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_load_2i64_to_2f64: @@ -3034,7 +3034,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_load_2i64_to_2f64: @@ -3044,7 +3044,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_load_2i64_to_2f64: @@ -3054,7 +3054,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64: @@ -3064,7 +3064,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64: @@ -3220,7 +3220,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 @@ -3228,7 +3228,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: cvtsi2sd %rax, %xmm2 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_load_4i64_to_4f64: @@ -3240,64 +3240,64 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2sd %rax, %xmm2 ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2sd %rax, %xmm1 -; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_load_4i64_to_4f64: ; VEX: # %bb.0: -; VEX-NEXT: vmovdqa (%rdi), %xmm0 +; VEX-NEXT: vmovapd (%rdi), %xmm0 ; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 ; VEX-NEXT: vpextrq $1, %xmm1, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; VEX-NEXT: vmovq %xmm1, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; VEX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_load_4i64_to_4f64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovapd (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_load_4i64_to_4f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovapd (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -4288,7 +4288,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; VEX-LABEL: sitofp_load_8i64_to_8f32: ; VEX: # %bb.0: -; VEX-NEXT: vmovdqa (%rdi), %xmm0 +; VEX-NEXT: vmovaps (%rdi), %xmm0 ; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 ; VEX-NEXT: vmovdqa 32(%rdi), %xmm2 ; VEX-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4319,7 +4319,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512F-LABEL: sitofp_load_8i64_to_8f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4350,7 +4350,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512VL-LABEL: sitofp_load_8i64_to_8f32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4648,7 +4648,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; VEX-LABEL: uitofp_load_4i64_to_4f32: ; VEX: # %bb.0: ; VEX-NEXT: vmovdqa (%rdi), %xmm2 -; VEX-NEXT: vmovdqa 16(%rdi), %xmm0 +; VEX-NEXT: vmovaps 16(%rdi), %xmm0 ; VEX-NEXT: vpextrq $1, %xmm2, %rax ; VEX-NEXT: testq %rax, %rax ; VEX-NEXT: js .LBB81_1 @@ -5167,7 +5167,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; VEX-LABEL: uitofp_load_8i64_to_8f32: ; VEX: # %bb.0: ; VEX-NEXT: vmovdqa (%rdi), %xmm1 -; VEX-NEXT: vmovdqa 16(%rdi), %xmm0 +; VEX-NEXT: vmovaps 16(%rdi), %xmm0 ; VEX-NEXT: vmovdqa 32(%rdi), %xmm4 ; VEX-NEXT: vmovdqa 48(%rdi), %xmm3 ; VEX-NEXT: vpextrq $1, %xmm4, %rax @@ -5293,7 +5293,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512F-LABEL: uitofp_load_8i64_to_8f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -5324,7 +5324,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512VL-LABEL: uitofp_load_8i64_to_8f32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 From 8682d29a28772bab0283a141b8701bfaea8881a6 Mon Sep 17 00:00:00 2001 From: Brian Gesiak Date: Sat, 30 Nov 2019 15:36:35 -0500 Subject: [PATCH 244/591] [Format] Add format check for coroutine keywords with negative numbers Summary: As a followup to D69144, this diff fixes the coroutine keyword spacing for co_yield / co_returning negative numbers. Reviewers: modocache, sammccall, Quuxplusone Reviewed By: modocache Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D69180 Patch by Jonathan Thomas (jonathoma)! --- clang/lib/Format/TokenAnnotator.cpp | 3 ++- clang/unittests/Format/FormatTest.cpp | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 9fe7fdc9ce937..74ff5b531496e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1829,7 +1829,8 @@ class AnnotatingParser { // Use heuristics to recognize unary operators. if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, tok::question, tok::colon, tok::kw_return, - tok::kw_case, tok::at, tok::l_brace, tok::kw_throw)) + tok::kw_case, tok::at, tok::l_brace, tok::kw_throw, + tok::kw_co_return, tok::kw_co_yield)) return TT_UnaryOperator; // There can't be two consecutive binary operators. diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d89ad44e4577f..e6fa829e7ff91 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -6990,6 +6990,9 @@ TEST_F(FormatTest, UnderstandsUnaryOperators) { verifyFormat("int a = /* confusing comment */ -1;"); // FIXME: The space after 'i' is wrong, but hopefully, this is a rare case. verifyFormat("int a = i /* confusing comment */++;"); + + verifyFormat("co_yield -1;"); + verifyFormat("co_return -1;"); } TEST_F(FormatTest, DoesNotIndentRelativeToUnaryOperators) { From 85c74384778909789389b9012a75cfcca7964a28 Mon Sep 17 00:00:00 2001 From: Tyker Date: Sat, 30 Nov 2019 16:42:33 +0100 Subject: [PATCH 245/591] [clang][modules] Add support for merging lifetime-extended temporaries Summary: Add support for merging lifetime-extended temporaries Reviewers: rsmith Reviewed By: rsmith Subscribers: xbolva00, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70190 --- clang/include/clang/AST/DeclCXX.h | 4 +++- clang/include/clang/AST/TextNodeDumper.h | 2 ++ clang/include/clang/Serialization/ASTReader.h | 8 +++++++ clang/lib/AST/TextNodeDumper.cpp | 11 ++++++++++ clang/lib/Serialization/ASTReaderDecl.cpp | 22 +++++++++++++++++++ .../merge-lifetime-extended-temporary/a.h | 2 ++ .../merge-lifetime-extended-temporary/b.h | 4 ++++ .../merge-lifetime-extended-temporary/c.h | 4 ++++ .../module.modulemap | 14 ++++++++++++ .../merge-lifetime-extended-temporary.cpp | 14 ++++++++++++ 10 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap create mode 100644 clang/test/Modules/merge-lifetime-extended-temporary.cpp diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 63d67bd3f55b2..0f2018fb9e8cb 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -3041,7 +3041,9 @@ class NamespaceAliasDecl : public NamedDecl, /// Implicit declaration of a temporary that was materialized by /// a MaterializeTemporaryExpr and lifetime-extended by a declaration -class LifetimeExtendedTemporaryDecl final : public Decl { +class LifetimeExtendedTemporaryDecl final + : public Decl, + public Mergeable { friend class MaterializeTemporaryExpr; friend class ASTDeclReader; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 0ff5a614a864d..d293ea190aa43 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -346,6 +346,8 @@ class TextNodeDumper void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); void VisitBlockDecl(const BlockDecl *D); void VisitConceptDecl(const ConceptDecl *D); + void + VisitLifetimeExtendedTemporaryDecl(const LifetimeExtendedTemporaryDecl *D); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index f0b5e99338232..b6dae68b3413b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -551,6 +551,14 @@ class ASTReader llvm::DenseMap> AnonymousDeclarationsForMerging; + /// Key used to identify LifetimeExtendedTemporaryDecl for merging, + /// containing the lifetime-extending declaration and the mangling number. + using LETemporaryKey = std::pair; + + /// Map of already deserialiazed temporaries. + llvm::DenseMap + LETemporaryForMerging; + struct FileDeclsInfo { ModuleFile *Mod = nullptr; ArrayRef Decls; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 0ff95213118fd..561c76a45cbc2 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1338,6 +1338,17 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { OS << " <<getNumParams() << ">>>"; } +void TextNodeDumper::VisitLifetimeExtendedTemporaryDecl( + const LifetimeExtendedTemporaryDecl *D) { + OS << " extended by "; + dumpBareDeclRef(D->getExtendingDecl()); + OS << " mangling "; + { + ColorScope Color(OS, ShowColors, ValueColor); + OS << D->getManglingNumber(); + } +} + void TextNodeDumper::VisitFieldDecl(const FieldDecl *D) { dumpName(D); dumpType(D->getType()); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 8991a39a70679..3f7a1ed7fd5c2 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -424,6 +424,8 @@ namespace clang { template void mergeMergeable(Mergeable *D); + void mergeMergeable(LifetimeExtendedTemporaryDecl *D); + void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl); @@ -2358,6 +2360,7 @@ void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( if (Record.readInt()) D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->ManglingNumber = Record.readInt(); + mergeMergeable(D); } std::pair @@ -2555,6 +2558,25 @@ static bool allowODRLikeMergeInC(NamedDecl *ND) { return false; } +/// Attempts to merge LifetimeExtendedTemporaryDecl with +/// identical class definitions from two different modules. +void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) { + // If modules are not available, there is no reason to perform this merge. + if (!Reader.getContext().getLangOpts().Modules) + return; + + LifetimeExtendedTemporaryDecl *LETDecl = D; + + LifetimeExtendedTemporaryDecl *&LookupResult = + Reader.LETemporaryForMerging[std::make_pair( + LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; + if (LookupResult) + Reader.getContext().setPrimaryMergedDecl(LETDecl, + LookupResult->getCanonicalDecl()); + else + LookupResult = LETDecl; +} + /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h new file mode 100644 index 0000000000000..8adab29eafc76 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h @@ -0,0 +1,2 @@ + +constexpr const int& LETemp = 0; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h new file mode 100644 index 0000000000000..2bd1b096d6073 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h @@ -0,0 +1,4 @@ + +#include "a.h" + +constexpr const int* PtrTemp1 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h new file mode 100644 index 0000000000000..b023eebca49c2 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h @@ -0,0 +1,4 @@ + +#include "a.h" + +constexpr const int* PtrTemp2 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap new file mode 100644 index 0000000000000..1339d627a44af --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap @@ -0,0 +1,14 @@ +module "a" { + export * + header "a.h" +} + +module "b" { + export * + header "b.h" +} + +module "c" { + export * + header "c.h" +} diff --git a/clang/test/Modules/merge-lifetime-extended-temporary.cpp b/clang/test/Modules/merge-lifetime-extended-temporary.cpp new file mode 100644 index 0000000000000..36db948b2c4ef --- /dev/null +++ b/clang/test/Modules/merge-lifetime-extended-temporary.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=1 +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=2 + +// expected-no-diagnostics +#if ORDER == 1 +#include "c.h" +#include "b.h" +#else +#include "b.h" +#include "c.h" +#endif + +static_assert(PtrTemp1 == &LETemp, ""); +static_assert(PtrTemp1 == PtrTemp2, ""); From 486d1a535896aa4f48f0ecaf451ea35dbd4f137b Mon Sep 17 00:00:00 2001 From: Tyker Date: Sun, 1 Dec 2019 11:58:14 +0100 Subject: [PATCH 246/591] Revert "[clang][modules] Add support for merging lifetime-extended temporaries" This reverts commit 85c74384778909789389b9012a75cfcca7964a28. --- clang/include/clang/AST/DeclCXX.h | 4 +--- clang/include/clang/AST/TextNodeDumper.h | 2 -- clang/include/clang/Serialization/ASTReader.h | 8 ------- clang/lib/AST/TextNodeDumper.cpp | 11 ---------- clang/lib/Serialization/ASTReaderDecl.cpp | 22 ------------------- .../merge-lifetime-extended-temporary/a.h | 2 -- .../merge-lifetime-extended-temporary/b.h | 4 ---- .../merge-lifetime-extended-temporary/c.h | 4 ---- .../module.modulemap | 14 ------------ .../merge-lifetime-extended-temporary.cpp | 14 ------------ 10 files changed, 1 insertion(+), 84 deletions(-) delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap delete mode 100644 clang/test/Modules/merge-lifetime-extended-temporary.cpp diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 0f2018fb9e8cb..63d67bd3f55b2 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -3041,9 +3041,7 @@ class NamespaceAliasDecl : public NamedDecl, /// Implicit declaration of a temporary that was materialized by /// a MaterializeTemporaryExpr and lifetime-extended by a declaration -class LifetimeExtendedTemporaryDecl final - : public Decl, - public Mergeable { +class LifetimeExtendedTemporaryDecl final : public Decl { friend class MaterializeTemporaryExpr; friend class ASTDeclReader; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index d293ea190aa43..0ff5a614a864d 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -346,8 +346,6 @@ class TextNodeDumper void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); void VisitBlockDecl(const BlockDecl *D); void VisitConceptDecl(const ConceptDecl *D); - void - VisitLifetimeExtendedTemporaryDecl(const LifetimeExtendedTemporaryDecl *D); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index b6dae68b3413b..f0b5e99338232 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -551,14 +551,6 @@ class ASTReader llvm::DenseMap> AnonymousDeclarationsForMerging; - /// Key used to identify LifetimeExtendedTemporaryDecl for merging, - /// containing the lifetime-extending declaration and the mangling number. - using LETemporaryKey = std::pair; - - /// Map of already deserialiazed temporaries. - llvm::DenseMap - LETemporaryForMerging; - struct FileDeclsInfo { ModuleFile *Mod = nullptr; ArrayRef Decls; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 561c76a45cbc2..0ff95213118fd 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1338,17 +1338,6 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { OS << " <<getNumParams() << ">>>"; } -void TextNodeDumper::VisitLifetimeExtendedTemporaryDecl( - const LifetimeExtendedTemporaryDecl *D) { - OS << " extended by "; - dumpBareDeclRef(D->getExtendingDecl()); - OS << " mangling "; - { - ColorScope Color(OS, ShowColors, ValueColor); - OS << D->getManglingNumber(); - } -} - void TextNodeDumper::VisitFieldDecl(const FieldDecl *D) { dumpName(D); dumpType(D->getType()); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 3f7a1ed7fd5c2..8991a39a70679 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -424,8 +424,6 @@ namespace clang { template void mergeMergeable(Mergeable *D); - void mergeMergeable(LifetimeExtendedTemporaryDecl *D); - void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl); @@ -2360,7 +2358,6 @@ void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( if (Record.readInt()) D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->ManglingNumber = Record.readInt(); - mergeMergeable(D); } std::pair @@ -2558,25 +2555,6 @@ static bool allowODRLikeMergeInC(NamedDecl *ND) { return false; } -/// Attempts to merge LifetimeExtendedTemporaryDecl with -/// identical class definitions from two different modules. -void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) { - // If modules are not available, there is no reason to perform this merge. - if (!Reader.getContext().getLangOpts().Modules) - return; - - LifetimeExtendedTemporaryDecl *LETDecl = D; - - LifetimeExtendedTemporaryDecl *&LookupResult = - Reader.LETemporaryForMerging[std::make_pair( - LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; - if (LookupResult) - Reader.getContext().setPrimaryMergedDecl(LETDecl, - LookupResult->getCanonicalDecl()); - else - LookupResult = LETDecl; -} - /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h deleted file mode 100644 index 8adab29eafc76..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h +++ /dev/null @@ -1,2 +0,0 @@ - -constexpr const int& LETemp = 0; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h deleted file mode 100644 index 2bd1b096d6073..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h +++ /dev/null @@ -1,4 +0,0 @@ - -#include "a.h" - -constexpr const int* PtrTemp1 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h deleted file mode 100644 index b023eebca49c2..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h +++ /dev/null @@ -1,4 +0,0 @@ - -#include "a.h" - -constexpr const int* PtrTemp2 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap deleted file mode 100644 index 1339d627a44af..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap +++ /dev/null @@ -1,14 +0,0 @@ -module "a" { - export * - header "a.h" -} - -module "b" { - export * - header "b.h" -} - -module "c" { - export * - header "c.h" -} diff --git a/clang/test/Modules/merge-lifetime-extended-temporary.cpp b/clang/test/Modules/merge-lifetime-extended-temporary.cpp deleted file mode 100644 index 36db948b2c4ef..0000000000000 --- a/clang/test/Modules/merge-lifetime-extended-temporary.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=1 -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=2 - -// expected-no-diagnostics -#if ORDER == 1 -#include "c.h" -#include "b.h" -#else -#include "b.h" -#include "c.h" -#endif - -static_assert(PtrTemp1 == &LETemp, ""); -static_assert(PtrTemp1 == PtrTemp2, ""); From 89c47313c9b11c9f7b6ee1b6cbc7789fdb5e75ea Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Sun, 1 Dec 2019 15:16:31 +0000 Subject: [PATCH 247/591] remove UB from test by making GV alignment explicit --- llvm/test/Transforms/GlobalOpt/atomic.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/GlobalOpt/atomic.ll b/llvm/test/Transforms/GlobalOpt/atomic.ll index 7597e0f03ba3e..f49ab52701b5c 100644 --- a/llvm/test/Transforms/GlobalOpt/atomic.ll +++ b/llvm/test/Transforms/GlobalOpt/atomic.ll @@ -1,10 +1,10 @@ ; RUN: opt -globalopt < %s -S -o - | FileCheck %s -@GV1 = internal global i64 1 -@GV2 = internal global i32 0 +@GV1 = internal global i64 1, align 8 +@GV2 = internal global i32 0, align 4 -; CHECK: @GV1 = internal unnamed_addr global i64 1 -; CHECK: @GV2 = internal unnamed_addr global i32 0 +; CHECK: @GV1 = internal unnamed_addr global i64 1, align 8 +; CHECK: @GV2 = internal unnamed_addr global i32 0, align 4 define void @test1() { entry: From 35bc5276ca31e3f0e8e87322153f410fa6224e59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 30 Nov 2019 15:13:56 +0100 Subject: [PATCH 248/591] [libunwind] Emit dependent libraries only when detected by CMake 996e62eef75 added Linux-specific dependent libraries to libunwind sources. As a result, building libunwind with modern LLD on *BSD started failing due to trying to link libdl. Instead, add those libraries only if they were detected by CMake. While technically we could create a long list of systems that need -ldl and -lpthread, maintaining a duplicate list makes little sense when CMake needs to detect it for non-LLD systems anyway. Remove existing system exceptions since they should be covered by the CMake check anyway. Remove -D_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA since it is no longer explicitly needed, if we make the library-specific defines dependent on presence of this pragma support. Differential Revision: https://reviews.llvm.org/D70868 --- libunwind/CMakeLists.txt | 7 ++++++- libunwind/src/AddressSpace.hpp | 2 +- libunwind/src/RWMutex.hpp | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index 25dc95cf6ba76..08095d1333a56 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -352,7 +352,12 @@ if (WIN32 AND LIBUNWIND_ENABLE_STATIC AND NOT LIBUNWIND_ENABLE_SHARED) endif() if (LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) - add_definitions(-D_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) + if (LIBUNWIND_HAS_DL_LIB) + add_definitions(-D_LIBUNWIND_LINK_DL_LIB) + endif() + if (LIBUNWIND_HAS_PTHREAD_LIB) + add_definitions(-D_LIBUNWIND_LINK_PTHREAD_LIB) + endif() endif() #=============================================================================== diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index db67df4dc80ac..7433476f91172 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -27,7 +27,7 @@ #if _LIBUNWIND_USE_DLADDR #include -#if defined(__unix__) && defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBUNWIND_LINK_DL_LIB) #pragma comment(lib, "dl") #endif #endif diff --git a/libunwind/src/RWMutex.hpp b/libunwind/src/RWMutex.hpp index 954e94c322d45..fcd3f4967d17f 100644 --- a/libunwind/src/RWMutex.hpp +++ b/libunwind/src/RWMutex.hpp @@ -17,7 +17,7 @@ #include #elif !defined(_LIBUNWIND_HAS_NO_THREADS) #include -#if defined(__unix__) && !defined(__ANDROID__) && defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +#if defined(__ELF__) && defined(_LIBUNWIND_LINK_PTHREAD_LIB) #pragma comment(lib, "pthread") #endif #endif From 3dd93dc2a1aeedcb49e9650124eff07ee3e2b881 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 30 Nov 2019 21:47:41 -0800 Subject: [PATCH 249/591] [X86][InstCombine] Move instcombine test from test/CodeGen/X86 to test/Transforms/InstCombine/ and replace grep with FileCheck --- .../InstCombine}/X86/2009-03-23-i80-fp80.ll | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) rename llvm/test/{CodeGen => Transforms/InstCombine}/X86/2009-03-23-i80-fp80.ll (58%) diff --git a/llvm/test/CodeGen/X86/2009-03-23-i80-fp80.ll b/llvm/test/Transforms/InstCombine/X86/2009-03-23-i80-fp80.ll similarity index 58% rename from llvm/test/CodeGen/X86/2009-03-23-i80-fp80.ll rename to llvm/test/Transforms/InstCombine/X86/2009-03-23-i80-fp80.ll index e542325b63697..f093c435a879e 100644 --- a/llvm/test/CodeGen/X86/2009-03-23-i80-fp80.ll +++ b/llvm/test/Transforms/InstCombine/X86/2009-03-23-i80-fp80.ll @@ -1,14 +1,20 @@ -; RUN: opt < %s -instcombine -S | grep 302245289961712575840256 -; RUN: opt < %s -instcombine -S | grep K40018000000000000000 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" define i80 @from() { +; CHECK-LABEL: @from( +; CHECK-NEXT: ret i80 302245289961712575840256 +; %tmp = bitcast x86_fp80 0xK4000C000000000000000 to i80 ret i80 %tmp } define x86_fp80 @to() { +; CHECK-LABEL: @to( +; CHECK-NEXT: ret x86_fp80 0xK40018000000000000000 +; %tmp = bitcast i80 302259125019767858003968 to x86_fp80 ret x86_fp80 %tmp } From 67298d683ca18fa8855349b5b0c289f92c93b6b1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 30 Nov 2019 21:53:28 -0800 Subject: [PATCH 250/591] [X86][InstCombine] Move non-X86 specific instcombine test from test/CodeGen/X86/ to test/Transforms/InstCombine/ --- .../{CodeGen/X86 => Transforms/InstCombine}/vec_udiv_to_shift.ll | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename llvm/test/{CodeGen/X86 => Transforms/InstCombine}/vec_udiv_to_shift.ll (100%) diff --git a/llvm/test/CodeGen/X86/vec_udiv_to_shift.ll b/llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll similarity index 100% rename from llvm/test/CodeGen/X86/vec_udiv_to_shift.ll rename to llvm/test/Transforms/InstCombine/vec_udiv_to_shift.ll From a3cbe1a202df6ec8e23bd55e14db254e4bc33021 Mon Sep 17 00:00:00 2001 From: Tyker Date: Sat, 30 Nov 2019 16:42:33 +0100 Subject: [PATCH 251/591] [clang][modules] Add support for merging lifetime-extended temporaries Summary: Add support for merging lifetime-extended temporaries Reviewers: rsmith Reviewed By: rsmith Subscribers: xbolva00, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70190 --- clang/include/clang/AST/DeclCXX.h | 4 +++- clang/include/clang/AST/TextNodeDumper.h | 2 ++ clang/include/clang/Serialization/ASTReader.h | 8 +++++++ clang/lib/AST/TextNodeDumper.cpp | 11 ++++++++++ clang/lib/Serialization/ASTReaderDecl.cpp | 22 +++++++++++++++++++ .../merge-lifetime-extended-temporary/a.h | 2 ++ .../merge-lifetime-extended-temporary/b.h | 4 ++++ .../merge-lifetime-extended-temporary/c.h | 4 ++++ .../module.modulemap | 14 ++++++++++++ .../merge-lifetime-extended-temporary.cpp | 14 ++++++++++++ 10 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h create mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap create mode 100644 clang/test/Modules/merge-lifetime-extended-temporary.cpp diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 63d67bd3f55b2..0f2018fb9e8cb 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -3041,7 +3041,9 @@ class NamespaceAliasDecl : public NamedDecl, /// Implicit declaration of a temporary that was materialized by /// a MaterializeTemporaryExpr and lifetime-extended by a declaration -class LifetimeExtendedTemporaryDecl final : public Decl { +class LifetimeExtendedTemporaryDecl final + : public Decl, + public Mergeable { friend class MaterializeTemporaryExpr; friend class ASTDeclReader; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 0ff5a614a864d..d293ea190aa43 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -346,6 +346,8 @@ class TextNodeDumper void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); void VisitBlockDecl(const BlockDecl *D); void VisitConceptDecl(const ConceptDecl *D); + void + VisitLifetimeExtendedTemporaryDecl(const LifetimeExtendedTemporaryDecl *D); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index f0b5e99338232..b6dae68b3413b 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -551,6 +551,14 @@ class ASTReader llvm::DenseMap> AnonymousDeclarationsForMerging; + /// Key used to identify LifetimeExtendedTemporaryDecl for merging, + /// containing the lifetime-extending declaration and the mangling number. + using LETemporaryKey = std::pair; + + /// Map of already deserialiazed temporaries. + llvm::DenseMap + LETemporaryForMerging; + struct FileDeclsInfo { ModuleFile *Mod = nullptr; ArrayRef Decls; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 0ff95213118fd..561c76a45cbc2 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1338,6 +1338,17 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { OS << " <<getNumParams() << ">>>"; } +void TextNodeDumper::VisitLifetimeExtendedTemporaryDecl( + const LifetimeExtendedTemporaryDecl *D) { + OS << " extended by "; + dumpBareDeclRef(D->getExtendingDecl()); + OS << " mangling "; + { + ColorScope Color(OS, ShowColors, ValueColor); + OS << D->getManglingNumber(); + } +} + void TextNodeDumper::VisitFieldDecl(const FieldDecl *D) { dumpName(D); dumpType(D->getType()); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 8991a39a70679..3f7a1ed7fd5c2 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -424,6 +424,8 @@ namespace clang { template void mergeMergeable(Mergeable *D); + void mergeMergeable(LifetimeExtendedTemporaryDecl *D); + void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl); @@ -2358,6 +2360,7 @@ void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( if (Record.readInt()) D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->ManglingNumber = Record.readInt(); + mergeMergeable(D); } std::pair @@ -2555,6 +2558,25 @@ static bool allowODRLikeMergeInC(NamedDecl *ND) { return false; } +/// Attempts to merge LifetimeExtendedTemporaryDecl with +/// identical class definitions from two different modules. +void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) { + // If modules are not available, there is no reason to perform this merge. + if (!Reader.getContext().getLangOpts().Modules) + return; + + LifetimeExtendedTemporaryDecl *LETDecl = D; + + LifetimeExtendedTemporaryDecl *&LookupResult = + Reader.LETemporaryForMerging[std::make_pair( + LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; + if (LookupResult) + Reader.getContext().setPrimaryMergedDecl(LETDecl, + LookupResult->getCanonicalDecl()); + else + LookupResult = LETDecl; +} + /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h new file mode 100644 index 0000000000000..8adab29eafc76 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h @@ -0,0 +1,2 @@ + +constexpr const int& LETemp = 0; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h new file mode 100644 index 0000000000000..2bd1b096d6073 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h @@ -0,0 +1,4 @@ + +#include "a.h" + +constexpr const int* PtrTemp1 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h new file mode 100644 index 0000000000000..b023eebca49c2 --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h @@ -0,0 +1,4 @@ + +#include "a.h" + +constexpr const int* PtrTemp2 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap new file mode 100644 index 0000000000000..1339d627a44af --- /dev/null +++ b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap @@ -0,0 +1,14 @@ +module "a" { + export * + header "a.h" +} + +module "b" { + export * + header "b.h" +} + +module "c" { + export * + header "c.h" +} diff --git a/clang/test/Modules/merge-lifetime-extended-temporary.cpp b/clang/test/Modules/merge-lifetime-extended-temporary.cpp new file mode 100644 index 0000000000000..36db948b2c4ef --- /dev/null +++ b/clang/test/Modules/merge-lifetime-extended-temporary.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=1 +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=2 + +// expected-no-diagnostics +#if ORDER == 1 +#include "c.h" +#include "b.h" +#else +#include "b.h" +#include "c.h" +#endif + +static_assert(PtrTemp1 == &LETemp, ""); +static_assert(PtrTemp1 == PtrTemp2, ""); From ae5484540f15bcbcb0de9558e66b0217ab8473ed Mon Sep 17 00:00:00 2001 From: Tyker Date: Sun, 1 Dec 2019 22:38:31 +0100 Subject: [PATCH 252/591] Revert "[clang][modules] Add support for merging lifetime-extended temporaries" This reverts commit a3cbe1a202df6ec8e23bd55e14db254e4bc33021. --- clang/include/clang/AST/DeclCXX.h | 4 +--- clang/include/clang/AST/TextNodeDumper.h | 2 -- clang/include/clang/Serialization/ASTReader.h | 8 ------- clang/lib/AST/TextNodeDumper.cpp | 11 ---------- clang/lib/Serialization/ASTReaderDecl.cpp | 22 ------------------- .../merge-lifetime-extended-temporary/a.h | 2 -- .../merge-lifetime-extended-temporary/b.h | 4 ---- .../merge-lifetime-extended-temporary/c.h | 4 ---- .../module.modulemap | 14 ------------ .../merge-lifetime-extended-temporary.cpp | 14 ------------ 10 files changed, 1 insertion(+), 84 deletions(-) delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h delete mode 100644 clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap delete mode 100644 clang/test/Modules/merge-lifetime-extended-temporary.cpp diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 0f2018fb9e8cb..63d67bd3f55b2 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -3041,9 +3041,7 @@ class NamespaceAliasDecl : public NamedDecl, /// Implicit declaration of a temporary that was materialized by /// a MaterializeTemporaryExpr and lifetime-extended by a declaration -class LifetimeExtendedTemporaryDecl final - : public Decl, - public Mergeable { +class LifetimeExtendedTemporaryDecl final : public Decl { friend class MaterializeTemporaryExpr; friend class ASTDeclReader; diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index d293ea190aa43..0ff5a614a864d 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -346,8 +346,6 @@ class TextNodeDumper void VisitObjCPropertyImplDecl(const ObjCPropertyImplDecl *D); void VisitBlockDecl(const BlockDecl *D); void VisitConceptDecl(const ConceptDecl *D); - void - VisitLifetimeExtendedTemporaryDecl(const LifetimeExtendedTemporaryDecl *D); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index b6dae68b3413b..f0b5e99338232 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -551,14 +551,6 @@ class ASTReader llvm::DenseMap> AnonymousDeclarationsForMerging; - /// Key used to identify LifetimeExtendedTemporaryDecl for merging, - /// containing the lifetime-extending declaration and the mangling number. - using LETemporaryKey = std::pair; - - /// Map of already deserialiazed temporaries. - llvm::DenseMap - LETemporaryForMerging; - struct FileDeclsInfo { ModuleFile *Mod = nullptr; ArrayRef Decls; diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index 561c76a45cbc2..0ff95213118fd 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1338,17 +1338,6 @@ void TextNodeDumper::VisitFunctionDecl(const FunctionDecl *D) { OS << " <<getNumParams() << ">>>"; } -void TextNodeDumper::VisitLifetimeExtendedTemporaryDecl( - const LifetimeExtendedTemporaryDecl *D) { - OS << " extended by "; - dumpBareDeclRef(D->getExtendingDecl()); - OS << " mangling "; - { - ColorScope Color(OS, ShowColors, ValueColor); - OS << D->getManglingNumber(); - } -} - void TextNodeDumper::VisitFieldDecl(const FieldDecl *D) { dumpName(D); dumpType(D->getType()); diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index 3f7a1ed7fd5c2..8991a39a70679 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -424,8 +424,6 @@ namespace clang { template void mergeMergeable(Mergeable *D); - void mergeMergeable(LifetimeExtendedTemporaryDecl *D); - void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, DeclID DsID, bool IsKeyDecl); @@ -2360,7 +2358,6 @@ void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( if (Record.readInt()) D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->ManglingNumber = Record.readInt(); - mergeMergeable(D); } std::pair @@ -2558,25 +2555,6 @@ static bool allowODRLikeMergeInC(NamedDecl *ND) { return false; } -/// Attempts to merge LifetimeExtendedTemporaryDecl with -/// identical class definitions from two different modules. -void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) { - // If modules are not available, there is no reason to perform this merge. - if (!Reader.getContext().getLangOpts().Modules) - return; - - LifetimeExtendedTemporaryDecl *LETDecl = D; - - LifetimeExtendedTemporaryDecl *&LookupResult = - Reader.LETemporaryForMerging[std::make_pair( - LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; - if (LookupResult) - Reader.getContext().setPrimaryMergedDecl(LETDecl, - LookupResult->getCanonicalDecl()); - else - LookupResult = LETDecl; -} - /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h deleted file mode 100644 index 8adab29eafc76..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/a.h +++ /dev/null @@ -1,2 +0,0 @@ - -constexpr const int& LETemp = 0; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h deleted file mode 100644 index 2bd1b096d6073..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/b.h +++ /dev/null @@ -1,4 +0,0 @@ - -#include "a.h" - -constexpr const int* PtrTemp1 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h deleted file mode 100644 index b023eebca49c2..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/c.h +++ /dev/null @@ -1,4 +0,0 @@ - -#include "a.h" - -constexpr const int* PtrTemp2 = &LETemp; diff --git a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap b/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap deleted file mode 100644 index 1339d627a44af..0000000000000 --- a/clang/test/Modules/Inputs/merge-lifetime-extended-temporary/module.modulemap +++ /dev/null @@ -1,14 +0,0 @@ -module "a" { - export * - header "a.h" -} - -module "b" { - export * - header "b.h" -} - -module "c" { - export * - header "c.h" -} diff --git a/clang/test/Modules/merge-lifetime-extended-temporary.cpp b/clang/test/Modules/merge-lifetime-extended-temporary.cpp deleted file mode 100644 index 36db948b2c4ef..0000000000000 --- a/clang/test/Modules/merge-lifetime-extended-temporary.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=1 -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x c++ -I%S/Inputs/merge-lifetime-extended-temporary -verify -std=c++11 %s -DORDER=2 - -// expected-no-diagnostics -#if ORDER == 1 -#include "c.h" -#include "b.h" -#else -#include "b.h" -#include "c.h" -#endif - -static_assert(PtrTemp1 == &LETemp, ""); -static_assert(PtrTemp1 == PtrTemp2, ""); From 19fd8925a4afe6efd248688cce06aceff50efe0c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 1 Dec 2019 22:19:05 +0000 Subject: [PATCH 253/591] Revert "[Examples] Add IRTransformations directory to examples." This breaks LLVMExports.cmake in some build configurations. PR44197 This reverts commits ceb72d07b004af9c428c4a3c73a98ea97d49a713 7d0b1d77b3d4d47df477519fd1bf099b3df6f899. --- llvm/CMakeLists.txt | 4 - llvm/examples/CMakeLists.txt | 1 - llvm/examples/IRTransforms/CMakeLists.txt | 15 - .../IRTransforms/InitializePasses.cpp | 21 - llvm/examples/IRTransforms/InitializePasses.h | 22 - llvm/examples/IRTransforms/SimplifyCFG.cpp | 414 ------------------ llvm/examples/IRTransforms/SimplifyCFG.h | 24 - .../tut-simplify-cfg-blockaddress.ll | 23 - .../SimplifyCFG/tut-simplify-cfg1.ll | 90 ---- .../tut-simplify-cfg2-dead-block-order.ll | 109 ----- .../SimplifyCFG/tut-simplify-cfg3-phis.ll | 70 --- ...ify-cfg4-multiple-duplicate-cfg-updates.ll | 40 -- ...t-simplify-cfg5-del-phis-for-dead-block.ll | 122 ------ .../tut-simplify-cfg6-dead-self-loop.ll | 25 -- llvm/tools/opt/CMakeLists.txt | 4 - llvm/tools/opt/opt.cpp | 8 - 16 files changed, 992 deletions(-) delete mode 100644 llvm/examples/IRTransforms/CMakeLists.txt delete mode 100644 llvm/examples/IRTransforms/InitializePasses.cpp delete mode 100644 llvm/examples/IRTransforms/InitializePasses.h delete mode 100644 llvm/examples/IRTransforms/SimplifyCFG.cpp delete mode 100644 llvm/examples/IRTransforms/SimplifyCFG.h delete mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg-blockaddress.ll delete mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg1.ll delete mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg2-dead-block-order.ll delete mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg3-phis.ll delete mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg4-multiple-duplicate-cfg-updates.ll delete mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg5-del-phis-for-dead-block.ll delete mode 100644 llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg6-dead-self-loop.ll diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index b1a51b332ff0d..1479e29b4a3ac 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -530,10 +530,6 @@ option(LLVM_BUILD_EXAMPLES "Build the LLVM example programs. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON) -if(LLVM_BUILD_EXAMPLES) - add_definitions(-DBUILD_EXAMPLES) -endif(LLVM_BUILD_EXAMPLES) - option(LLVM_BUILD_TESTS "Build LLVM unit tests. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) diff --git a/llvm/examples/CMakeLists.txt b/llvm/examples/CMakeLists.txt index 1fbcbf793b2da..ad99d4c7e3127 100644 --- a/llvm/examples/CMakeLists.txt +++ b/llvm/examples/CMakeLists.txt @@ -2,7 +2,6 @@ add_subdirectory(BrainF) add_subdirectory(Fibonacci) add_subdirectory(HowToUseJIT) add_subdirectory(HowToUseLLJIT) -add_subdirectory(IRTransforms) add_subdirectory(LLJITExamples) add_subdirectory(Kaleidoscope) add_subdirectory(ModuleMaker) diff --git a/llvm/examples/IRTransforms/CMakeLists.txt b/llvm/examples/IRTransforms/CMakeLists.txt deleted file mode 100644 index 1c3185eed5ff2..0000000000000 --- a/llvm/examples/IRTransforms/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Analysis - Core - Support - ) - -add_llvm_library(ExampleIRTransforms - InitializePasses.cpp - SimplifyCFG.cpp - - ADDITIONAL_HEADER_DIRS - - DEPENDS - intrinsics_gen - ) diff --git a/llvm/examples/IRTransforms/InitializePasses.cpp b/llvm/examples/IRTransforms/InitializePasses.cpp deleted file mode 100644 index 125180715cd41..0000000000000 --- a/llvm/examples/IRTransforms/InitializePasses.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- InitializePasses.cpp ----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements implements the initialization hook for the example -// transforms. -// -//===----------------------------------------------------------------------===// - -#include "InitializePasses.h" -#include "llvm/PassRegistry.h" - -using namespace llvm; - -void initializeExampleIRTransforms(PassRegistry &Registry) { - initializeSimplifyCFGLegacyPassPass(Registry); -} diff --git a/llvm/examples/IRTransforms/InitializePasses.h b/llvm/examples/IRTransforms/InitializePasses.h deleted file mode 100644 index 8b6673d518e63..0000000000000 --- a/llvm/examples/IRTransforms/InitializePasses.h +++ /dev/null @@ -1,22 +0,0 @@ -//===- InitializePasses.h - -------------------------------------*- C++ -*-===// -// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXAMPLES_IRTRANSFORMS_INITIALIZEPASSES__H -#define LLVM_EXAMPLES_IRTRANSFORMS_INITIALIZEPASSES__H - -#include "llvm/IR/PassManager.h" - -namespace llvm { - -void initializeExampleIRTransforms(PassRegistry &Registry); -void initializeSimplifyCFGLegacyPassPass(PassRegistry &Registry); - -} // end namespace llvm - -#endif diff --git a/llvm/examples/IRTransforms/SimplifyCFG.cpp b/llvm/examples/IRTransforms/SimplifyCFG.cpp deleted file mode 100644 index 10658c9f09590..0000000000000 --- a/llvm/examples/IRTransforms/SimplifyCFG.cpp +++ /dev/null @@ -1,414 +0,0 @@ -//===- SimplifyCFG.cpp ----------------------------------------------------===// -// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the control flow graph (CFG) simplifications -// presented as part of the 'Getting Started With LLVM: Basics' tutorial at the -// US LLVM Developers Meeting 2019. It also contains additional material. -// -// The current file contains three different CFG simplifications. There are -// multiple versions of each implementation (e.g. _v1 and _v2), which implement -// additional functionality (e.g. preserving analysis like the DominatorTree) or -// use additional utilities to simplify the code (e.g. LLVM's PatternMatch.h). -// The available simplifications are: -// 1. Trivially Dead block Removal (removeDeadBlocks_v[1,2]). -// This simplifications removes all blocks without predecessors in the CFG -// from a function. -// 2. Conditional Branch Elimination (eliminateCondBranches_v[1,2,3]) -// This simplification replaces conditional branches with constant integer -// conditions with unconditional branches. -// 3. Single Predecessor Block Merging (mergeIntoSinglePredecessor_v[1,2]) -// This simplification merges blocks with a single predecessor into the -// predecessor, if that block has a single successor. -// -// TODOs -// * Hook up pass to the new pass manager. -// * Preserve LoopInfo. -// * Add fixed point iteration to delete all dead blocks -// * Add implementation using reachability to discover dead blocks. -//===----------------------------------------------------------------------===// - -#include "SimplifyCFG.h" -#include "InitializePasses.h" -#include "llvm/Analysis/DomTreeUpdater.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/PassManager.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" - -using namespace llvm; -using namespace PatternMatch; - -enum TutorialVersion { V1, V2, V3 }; -static cl::opt - Version("tut-simplifycfg-version", cl::desc("Select tutorial version"), - cl::Hidden, cl::ValueOptional, cl::init(V1), - cl::values(clEnumValN(V1, "v1", "version 1"), - clEnumValN(V2, "v2", "version 2"), - clEnumValN(V3, "v3", "version 3"), - // Sentinel value for unspecified option. - clEnumValN(V3, "", ""))); - -#define DEBUG_TYPE "tut-simplifycfg" - -// Remove trivially dead blocks. First version, not preserving the -// DominatorTree. -static bool removeDeadBlocks_v1(Function &F) { - bool Changed = false; - - // Remove trivially dead blocks. - for (BasicBlock &BB : make_early_inc_range(F)) { - // Skip blocks we know to not be trivially dead. We know a block is - // guaranteed to be dead, iff it is neither the entry block nor - // has any predecessors. - if (&F.getEntryBlock() == &BB || !pred_empty(&BB)) - continue; - - // Notify successors of BB that BB is going to be removed. This removes - // incoming values from BB from PHIs in the successors. Note that this will - // not actually remove BB from the predecessor lists of its successors. - for (BasicBlock *Succ : successors(&BB)) - Succ->removePredecessor(&BB); - // TODO: Find a better place to put such small variations. - // Alternatively, we can update the PHI nodes manually: - // for (PHINode &PN : make_early_inc_range(Succ->phis())) - // PN.removeIncomingValue(&BB); - - // Replace all instructions in BB with an undef constant. The block is - // unreachable, so the results of the instructions should never get used. - while (!BB.empty()) { - Instruction &I = BB.back(); - I.replaceAllUsesWith(UndefValue::get(I.getType())); - I.eraseFromParent(); - } - - // Finally remove the basic block. - BB.eraseFromParent(); - Changed = true; - } - - return Changed; -} - -// Remove trivially dead blocks. This is the second version and preserves the -// dominator tree. -static bool removeDeadBlocks_v2(Function &F, DominatorTree &DT) { - bool Changed = false; - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - SmallVector DTUpdates; - - // Remove trivially dead blocks. - for (BasicBlock &BB : make_early_inc_range(F)) { - // Skip blocks we know to not be trivially dead. We know a block is - // guaranteed to be dead, iff it is neither the entry block nor - // has any predecessors. - if (&F.getEntryBlock() == &BB || !pred_empty(&BB)) - continue; - - // Notify successors of BB that BB is going to be removed. This removes - // incoming values from BB from PHIs in the successors. Note that this will - // not actually remove BB from the predecessor lists of its successors. - for (BasicBlock *Succ : successors(&BB)) { - Succ->removePredecessor(&BB); - - // Collect updates that need to be applied to the dominator tree. - DTUpdates.push_back({DominatorTree::Delete, &BB, Succ}); - } - - // Remove BB via the DomTreeUpdater. DomTreeUpdater::deleteBB conveniently - // removes the instructions in BB as well. - DTU.deleteBB(&BB); - Changed = true; - } - - // Apply updates permissively, to remove duplicates. - DTU.applyUpdatesPermissive(DTUpdates); - - return Changed; -} - -// Eliminate branches with constant conditionals. This is the first version, -// which *does not* preserve the dominator tree. -static bool eliminateCondBranches_v1(Function &F) { - bool Changed = false; - - // Eliminate branches with constant conditionals. - for (BasicBlock &BB : F) { - // Skip blocks without conditional branches as terminators. - BranchInst *BI = dyn_cast(BB.getTerminator()); - if (!BI || !BI->isConditional()) - continue; - - // Skip blocks with conditional branches without ConstantInt conditions. - ConstantInt *CI = dyn_cast(BI->getCondition()); - if (!CI) - continue; - - // We use the branch condition (CI), to select the successor we remove: - // if CI == 1 (true), we remove the second successor, otherwise the first. - BasicBlock *RemovedSucc = BI->getSuccessor(CI->isOne()); - // Tell RemovedSucc we will remove BB from its predecessors. - RemovedSucc->removePredecessor(&BB); - - // Replace the conditional branch with an unconditional one, by creating - // a new unconditional branch to the selected successor and removing the - // conditional one. - BranchInst::Create(BI->getSuccessor(CI->isZero()), BI); - BI->eraseFromParent(); - Changed = true; - } - - return Changed; -} - -// Eliminate branches with constant conditionals. This is the second -// version, which *does* preserve the dominator tree. -static bool eliminateCondBranches_v2(Function &F, DominatorTree &DT) { - bool Changed = false; - - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - SmallVector DTUpdates; - // Eliminate branches with constant conditionals. - for (BasicBlock &BB : F) { - // Skip blocks without conditional branches as terminators. - BranchInst *BI = dyn_cast(BB.getTerminator()); - if (!BI || !BI->isConditional()) - continue; - - // Skip blocks with conditional branches without ConstantInt conditions. - ConstantInt *CI = dyn_cast(BI->getCondition()); - if (!CI) - continue; - - // We use the branch condition (CI), to select the successor we remove: - // if CI == 1 (true), we remove the second successor, otherwise the first. - BasicBlock *RemovedSucc = BI->getSuccessor(CI->isOne()); - // Tell RemovedSucc we will remove BB from its predecessors. - RemovedSucc->removePredecessor(&BB); - - // Replace the conditional branch with an unconditional one, by creating - // a new unconditional branch to the selected successor and removing the - // conditional one. - BranchInst *NewBranch = - BranchInst::Create(BI->getSuccessor(CI->isZero()), BI); - BI->eraseFromParent(); - - // Delete the edge between BB and RemovedSucc in the DominatorTree, iff - // the conditional branch did not use RemovedSucc as both the true and false - // branches. - if (NewBranch->getSuccessor(0) != RemovedSucc) - DTUpdates.push_back({DominatorTree::Delete, &BB, RemovedSucc}); - Changed = true; - } - - // Apply updates permissively, to remove duplicates. - DTU.applyUpdatesPermissive(DTUpdates); - - return Changed; -} - -// Eliminate branches with constant conditionals. This is the third -// version, which uses PatternMatch.h. -static bool eliminateCondBranches_v3(Function &F, DominatorTree &DT) { - bool Changed = false; - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - SmallVector DTUpdates; - - // Eliminate branches with constant conditionals. - for (BasicBlock &BB : F) { - ConstantInt *CI = nullptr; - BasicBlock *TakenSucc, *RemovedSucc; - // Check if the terminator is a conditional branch, with constant integer - // condition and also capture the successor blocks as TakenSucc and - // RemovedSucc. - if (!match(BB.getTerminator(), - m_Br(m_ConstantInt(CI), m_BasicBlock(TakenSucc), - m_BasicBlock(RemovedSucc)))) - continue; - - // If the condition is false, swap TakenSucc and RemovedSucc. - if (CI->isZero()) - std::swap(TakenSucc, RemovedSucc); - - // Tell RemovedSucc we will remove BB from its predecessors. - RemovedSucc->removePredecessor(&BB); - - // Replace the conditional branch with an unconditional one, by creating - // a new unconditional branch to the selected successor and removing the - // conditional one. - - BranchInst *NewBranch = BranchInst::Create(TakenSucc, BB.getTerminator()); - BB.getTerminator()->eraseFromParent(); - - // Delete the edge between BB and RemovedSucc in the DominatorTree, iff - // the conditional branch did not use RemovedSucc as both the true and false - // branches. - if (NewBranch->getSuccessor(0) != RemovedSucc) - DTUpdates.push_back({DominatorTree::Delete, &BB, RemovedSucc}); - Changed = true; - } - - // Apply updates permissively, to remove duplicates. - DTU.applyUpdatesPermissive(DTUpdates); - return Changed; -} - -// Merge basic blocks into their single predecessor, if their predecessor has a -// single successor. This is the first version and does not preserve the -// DominatorTree. -static bool mergeIntoSinglePredecessor_v1(Function &F) { - bool Changed = false; - - // Merge blocks with single predecessors. - for (BasicBlock &BB : make_early_inc_range(F)) { - BasicBlock *Pred = BB.getSinglePredecessor(); - // Make sure BB has a single predecessor Pred and BB is the single - // successor of Pred. - if (!Pred || Pred->getSingleSuccessor() != &BB) - continue; - - // Do not try to merge self loops. That can happen in dead blocks. - if (Pred == &BB) - continue; - - // Need to replace it before nuking the branch. - BB.replaceAllUsesWith(Pred); - // PHI nodes in BB can only have a single incoming value. Remove them. - for (PHINode &PN : make_early_inc_range(BB.phis())) { - PN.replaceAllUsesWith(PN.getIncomingValue(0)); - PN.eraseFromParent(); - } - // Move all instructions from BB to Pred. - for (Instruction &I : make_early_inc_range(BB)) - I.moveBefore(Pred->getTerminator()); - - // Remove the Pred's terminator (which jumped to BB). BB's terminator - // will become Pred's terminator. - Pred->getTerminator()->eraseFromParent(); - BB.eraseFromParent(); - - Changed = true; - } - - return Changed; -} - -// Merge basic blocks into their single predecessor, if their predecessor has a -// single successor. This is the second version and does preserve the -// DominatorTree. -static bool mergeIntoSinglePredecessor_v2(Function &F, DominatorTree &DT) { - bool Changed = false; - DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); - SmallVector DTUpdates; - - // Merge blocks with single predecessors. - for (BasicBlock &BB : make_early_inc_range(F)) { - BasicBlock *Pred = BB.getSinglePredecessor(); - // Make sure BB has a single predecessor Pred and BB is the single - // successor of Pred. - if (!Pred || Pred->getSingleSuccessor() != &BB) - continue; - - // Do not try to merge self loops. That can happen in dead blocks. - if (Pred == &BB) - continue; - - // Tell DTU about the changes to the CFG: All edges from BB to its - // successors get removed and we add edges between Pred and BB's successors. - for (BasicBlock *Succ : successors(&BB)) { - DTUpdates.push_back({DominatorTree::Delete, &BB, Succ}); - DTUpdates.push_back({DominatorTree::Insert, Pred, Succ}); - } - // Also remove the edge between Pred and BB. - DTUpdates.push_back({DominatorTree::Delete, Pred, &BB}); - - // Need to replace it before nuking the branch. - BB.replaceAllUsesWith(Pred); - // PHI nodes in BB can only have a single incoming value. Remove them. - for (PHINode &PN : make_early_inc_range(BB.phis())) { - PN.replaceAllUsesWith(PN.getIncomingValue(0)); - PN.eraseFromParent(); - } - // Move all instructions from BB to Pred. - for (Instruction &I : make_early_inc_range(BB)) - I.moveBefore(Pred->getTerminator()); - - // Remove the Pred's terminator (which jumped to BB). BB's terminator - // will become Pred's terminator. - Pred->getTerminator()->eraseFromParent(); - DTU.deleteBB(&BB); - - Changed = true; - } - - // Apply updates permissively, to remove duplicates. - DTU.applyUpdatesPermissive(DTUpdates); - return Changed; -} - -static bool doSimplify_v1(Function &F) { - return eliminateCondBranches_v1(F) & mergeIntoSinglePredecessor_v1(F) & - removeDeadBlocks_v1(F); -} - -static bool doSimplify_v2(Function &F, DominatorTree &DT) { - return eliminateCondBranches_v2(F, DT) & - mergeIntoSinglePredecessor_v2(F, DT) & removeDeadBlocks_v2(F, DT); -} - -static bool doSimplify_v3(Function &F, DominatorTree &DT) { - return eliminateCondBranches_v3(F, DT) & - mergeIntoSinglePredecessor_v2(F, DT) & removeDeadBlocks_v2(F, DT); -} - -namespace { -struct SimplifyCFGLegacyPass : public FunctionPass { - static char ID; - SimplifyCFGLegacyPass() : FunctionPass(ID) { - initializeSimplifyCFGLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - // Version 1 of the implementation does not preserve the dominator tree. - if (Version != V1) - AU.addPreserved(); - - FunctionPass::getAnalysisUsage(AU); - } - - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - - switch (Version) { - case V1: - return doSimplify_v1(F); - case V2: { - auto &DT = getAnalysis().getDomTree(); - return doSimplify_v2(F, DT); - } - case V3: { - auto &DT = getAnalysis().getDomTree(); - return doSimplify_v3(F, DT); - } - } - - llvm_unreachable("Unsupported version"); - } -}; -} // namespace - -char SimplifyCFGLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(SimplifyCFGLegacyPass, DEBUG_TYPE, - "Tutorial CFG simplification", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(SimplifyCFGLegacyPass, DEBUG_TYPE, - "Tutorial CFG simplifications", false, false) diff --git a/llvm/examples/IRTransforms/SimplifyCFG.h b/llvm/examples/IRTransforms/SimplifyCFG.h deleted file mode 100644 index 09328afb01d36..0000000000000 --- a/llvm/examples/IRTransforms/SimplifyCFG.h +++ /dev/null @@ -1,24 +0,0 @@ -//===- SimplifyCFG.h - Tutorial SimplifyCFG ---------------------*- C++ -*-===// -// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXAMPLES_IRTRANSFORMS_SIMPLIFYCFG__H -#define LLVM_EXAMPLES_IRTRANSFORMS_SIMPLIFYCFG__H - -#include "llvm/Pass.h" -#include "llvm/PassRegistry.h" - -namespace llvm { - -FunctionPass *createSimplifyCFGPass(); - -void initializeSimplifyCFGLegacyPassPass(PassRegistry &); - -} // end namespace llvm - -#endif // LLVM_EXAMPLES_IRTRANSFORMS_SIMPLIFYCFG__H diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg-blockaddress.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg-blockaddress.ll deleted file mode 100644 index faf60f3acdb30..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg-blockaddress.ll +++ /dev/null @@ -1,23 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i8* @simp1(i32 %x) { -; CHECK-LABEL: @simp1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 42 -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CMP]], i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 1 to i8*) -; CHECK-NEXT: ret i8* [[ADDR]] -; -entry: - %cmp = icmp slt i32 %x, 42 - %addr = select i1 %cmp, i8* blockaddress(@simp1, %bb1), i8* blockaddress(@simp1, %bb2) - ret i8* %addr - -bb1: - ret i8* null - -bb2: - ret i8* null -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg1.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg1.ll deleted file mode 100644 index cb0f82e37573a..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg1.ll +++ /dev/null @@ -1,90 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i32 @simp1() { -; CHECK-LABEL: @simp1( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 10 -; -entry: - br i1 true, label %if.then, label %if.else - -if.then: - ret i32 10 - -if.else: - ret i32 12 -} - -define i32 @simp2() { -; CHECK-LABEL: @simp2( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 200 -; -entry: - br i1 false, label %if.then, label %if.else - -if.then: - ret i32 99 - -if.else: - ret i32 200 -} - -declare void @foo(i64) - -define i64 @merge_into_predecessor(i64 %a, i64 %b) { -; CHECK-LABEL: @merge_into_predecessor( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[R:%.*]] = add i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: call void @foo(i64 [[R]]) -; CHECK-NEXT: call void @foo(i64 [[A]]) -; CHECK-NEXT: ret i64 [[R]] -; -entry: - br label %bb.next - -bb.next: - %r = add i64 %a, %b - call void @foo(i64 %r) - call void @foo(i64 %a) - br label %bb.next.next - -bb.next.next: - ret i64 %r -} - -define i64 @merge_into_predecessor_with_phi(i64 %a, i64 %b, i1 %c) { -; CHECK-LABEL: @merge_into_predecessor_with_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: call void @foo(i64 [[B:%.*]]) -; CHECK-NEXT: [[R:%.*]] = add i64 [[A:%.*]], [[B]] -; CHECK-NEXT: call void @foo(i64 [[R]]) -; CHECK-NEXT: call void @foo(i64 [[A]]) -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB_NEXT_NEXT:%.*]], label [[BB_EXIT:%.*]] -; CHECK: bb.next.next: -; CHECK-NEXT: br label [[BB_EXIT]] -; CHECK: bb.exit: -; CHECK-NEXT: [[RET:%.*]] = phi i64 [ [[R]], [[ENTRY:%.*]] ], [ 10, [[BB_NEXT_NEXT]] ] -; CHECK-NEXT: ret i64 [[RET]] -; -entry: - call void @foo(i64 %b) - br label %bb.next - -bb.next: - %r = add i64 %a, %b - call void @foo(i64 %r) - call void @foo(i64 %a) - br i1 %c, label %bb.next.next, label %bb.exit - -bb.next.next: - br label %bb.exit - -bb.exit: - %ret = phi i64 [ %r, %bb.next], [ 10, %bb.next.next] - ret i64 %ret - -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg2-dead-block-order.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg2-dead-block-order.ll deleted file mode 100644 index 11b70fa526770..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg2-dead-block-order.ll +++ /dev/null @@ -1,109 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i32 @remove_dead_blocks() { -; CHECK-LABEL: @remove_dead_blocks( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK-NEXT: } -; -entry: - ret i32 1 - -bb.1: - ret i32 2 - -bb.2: - ret i32 3 -} - -define i32 @simp1() { -; CHECK-LABEL: @simp1( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK: bb.1: -; CHECK-NEXT: ret i32 2 -; CHECK-NEXT: } -; -entry: - ret i32 1 - -bb.1: - ret i32 2 - -bb.2: - br i1 undef, label %bb.1, label %bb.3 - -bb.3: - ret i32 3 -} - -define i32 @remove_dead_block_with_phi() { -; CHECK-LABEL: @remove_dead_block_with_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB_2:%.*]] -; CHECK: bb.2: -; CHECK-NEXT: ret i32 1 -; CHECK-NEXT: } -; -entry: - br label %bb.2 - -bb.1: - br label %bb.2 - -bb.2: - %rv = phi i32 [ 1, %entry ], [ 2, %bb.1 ] - ret i32 %rv -} - -define i32 @remove_dead_blocks_remaining_uses(i32 %a) { -; CHECK-LABEL: @remove_dead_blocks_remaining_uses( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK-NEXT: } -; -entry: - ret i32 1 - -bb.2: - ret i32 %res - -bb.1: - %res = add i32 %a, 10 - br label %bb.2 -} - -define i32 @remove_dead_blocks_remaining_uses2(i32 %a, i1 %cond) { -; CHECK-LABEL: @remove_dead_blocks_remaining_uses2( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK: bb.2: -; CHECK-NEXT: [[RES2:%.*]] = add i32 undef, 10 -; CHECK-NEXT: [[RES3:%.*]] = mul i32 [[RES2]], undef -; CHECK-NEXT: ret i32 [[RES3]] -; CHECK: bb.3: -; CHECK-NEXT: ret i32 undef -; CHECK-NEXT: } -; -entry: - ret i32 1 - -bb.2: - %res2 = add i32 %res, 10 - %res3 = mul i32 %res2, %res - ret i32 %res3 - -bb.3: - br label %bb.4 - -bb.4: - ret i32 %res - -bb.1: - %res = add i32 %a, 10 - br i1 %cond, label %bb.2, label %bb.3 - br label %bb.2 -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg3-phis.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg3-phis.ll deleted file mode 100644 index 76db503faeb25..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg3-phis.ll +++ /dev/null @@ -1,70 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i32 @phi_cond_branch_eliminated() { -; CHECK-LABEL: @phi_cond_branch_eliminated( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 20 -; -entry: - br i1 true, label %bb.2, label %bb.3 - -bb.2: - br label %bb.3 - -bb.3: - %ret = phi i32 [ 10, %entry ], [ 20, %bb.2 ] - ret i32 %ret -} - -define i32 @phi_removed() { -; CHECK-LABEL: @phi_removed( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB_3:%.*]] -; CHECK: bb.3: -; CHECK-NEXT: ret i32 0 -; -entry: - br i1 false, label %bb.2, label %bb.3 - -bb.2: - %pv = phi i32 [ 10, %entry ] - br label %bb.3 - -bb.3: - ret i32 0 -} - -define i32 @phi_in_dead_region() { -; CHECK-LABEL: @phi_in_dead_region( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; -entry: - ret i32 1 - -bb.1: - br i1 true, label %bb.2, label %bb.3 - -bb.2: - br label %bb.3 - -bb.3: - %ret = phi i32 [ 10, %bb.1 ], [ 20, %bb.2 ] - ret i32 %ret -} - -define i32 @phi_in_mergable_blocks() { -; CHECK-LABEL: @phi_in_mergable_blocks( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 10 -; -entry: - br label %bb.1 - -bb.1: - %pv = phi i32 [ 10, %entry ] - ret i32 %pv -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg4-multiple-duplicate-cfg-updates.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg4-multiple-duplicate-cfg-updates.ll deleted file mode 100644 index 82a0e0dac2369..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg4-multiple-duplicate-cfg-updates.ll +++ /dev/null @@ -1,40 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 < %s -S -verify-dom-info | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 < %s -S -verify-dom-info | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 < %s -S -verify-dom-info | FileCheck %s - -; Check that we do not crash when we remove edges multiple times in -; the DomTreeUpdater. -define void @test() { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i8 undef, label [[IF_THEN_EPIL:%.*]] [ -; CHECK-NEXT: i8 32, label [[FOR_INC_EPIL:%.*]] -; CHECK-NEXT: i8 46, label [[FOR_INC_EPIL]] -; CHECK-NEXT: i8 95, label [[FOR_INC_EPIL]] -; CHECK-NEXT: i8 45, label [[FOR_INC_EPIL]] -; CHECK-NEXT: i8 126, label [[FOR_INC_EPIL]] -; CHECK-NEXT: ] -; CHECK: if.then.epil: -; CHECK-NEXT: unreachable -; CHECK: for.inc.epil: -; CHECK-NEXT: ret void -; -entry: - br label %for.body.epil - -for.body.epil: ; preds = %entry - switch i8 undef, label %if.then.epil [ - i8 32, label %for.inc.epil - i8 46, label %for.inc.epil - i8 95, label %for.inc.epil - i8 45, label %for.inc.epil - i8 126, label %for.inc.epil - ] - -if.then.epil: ; preds = %for.body.epil - unreachable - -for.inc.epil: ; preds = %for.body.epil, %for.body.epil, %for.body.epil, %for.body.epil, %for.body.epil - ret void -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg5-del-phis-for-dead-block.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg5-del-phis-for-dead-block.ll deleted file mode 100644 index b3edd1aa50584..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg5-del-phis-for-dead-block.ll +++ /dev/null @@ -1,122 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 < %s -S -verify-dom-info | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 < %s -S -verify-dom-info | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 < %s -S -verify-dom-info | FileCheck %s - -define void @test() { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 undef, label [[SW_DEFAULT23:%.*]] [ -; CHECK-NEXT: i32 129, label [[SW_BB:%.*]] -; CHECK-NEXT: i32 215, label [[SW_BB1:%.*]] -; CHECK-NEXT: i32 117, label [[SW_BB1]] -; CHECK-NEXT: i32 207, label [[SW_BB1]] -; CHECK-NEXT: i32 158, label [[SW_BB1]] -; CHECK-NEXT: i32 94, label [[SW_BB1]] -; CHECK-NEXT: i32 219, label [[SW_BB1]] -; CHECK-NEXT: i32 88, label [[SW_BB1]] -; CHECK-NEXT: i32 168, label [[SW_BB1]] -; CHECK-NEXT: i32 295, label [[SW_BB1]] -; CHECK-NEXT: i32 294, label [[SW_BB1]] -; CHECK-NEXT: i32 296, label [[SW_BB1]] -; CHECK-NEXT: i32 67, label [[SW_BB1]] -; CHECK-NEXT: i32 293, label [[SW_BB1]] -; CHECK-NEXT: i32 382, label [[SW_BB1]] -; CHECK-NEXT: i32 335, label [[SW_BB1]] -; CHECK-NEXT: i32 393, label [[SW_BB1]] -; CHECK-NEXT: i32 415, label [[SW_BB1]] -; CHECK-NEXT: i32 400, label [[SW_BB1]] -; CHECK-NEXT: i32 383, label [[SW_BB1]] -; CHECK-NEXT: i32 421, label [[SW_BB1]] -; CHECK-NEXT: i32 422, label [[SW_BB1]] -; CHECK-NEXT: i32 302, label [[SW_BB1]] -; CHECK-NEXT: i32 303, label [[SW_BB1]] -; CHECK-NEXT: i32 304, label [[SW_BB1]] -; CHECK-NEXT: i32 420, label [[SW_BB1]] -; CHECK-NEXT: i32 401, label [[SW_EPILOG24:%.*]] -; CHECK-NEXT: i32 53, label [[SW_BB12:%.*]] -; CHECK-NEXT: i32 44, label [[SW_BB12]] -; CHECK-NEXT: ] -; CHECK: sw.bb: -; CHECK-NEXT: unreachable -; CHECK: sw.bb1: -; CHECK-NEXT: br label [[SW_EPILOG24]] -; CHECK: sw.bb12: -; CHECK-NEXT: switch i32 undef, label [[SW_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 47, label [[SW_BB13:%.*]] -; CHECK-NEXT: i32 8, label [[SW_BB13]] -; CHECK-NEXT: ] -; CHECK: sw.bb13: -; CHECK-NEXT: unreachable -; CHECK: sw.default: -; CHECK-NEXT: unreachable -; CHECK: sw.default23: -; CHECK-NEXT: unreachable -; CHECK: sw.epilog24: -; CHECK-NEXT: [[PREVIOUS_3:%.*]] = phi i32 [ undef, [[SW_BB1]] ], [ 401, [[ENTRY:%.*]] ] -; CHECK-NEXT: unreachable -; -entry: - br label %while.body - -while.body: ; preds = %entry - switch i32 undef, label %sw.default23 [ - i32 129, label %sw.bb - i32 215, label %sw.bb1 - i32 117, label %sw.bb1 - i32 207, label %sw.bb1 - i32 158, label %sw.bb1 - i32 94, label %sw.bb1 - i32 219, label %sw.bb1 - i32 88, label %sw.bb1 - i32 168, label %sw.bb1 - i32 295, label %sw.bb1 - i32 294, label %sw.bb1 - i32 296, label %sw.bb1 - i32 67, label %sw.bb1 - i32 293, label %sw.bb1 - i32 382, label %sw.bb1 - i32 335, label %sw.bb1 - i32 393, label %sw.bb1 - i32 415, label %sw.bb1 - i32 400, label %sw.bb1 - i32 383, label %sw.bb1 - i32 421, label %sw.bb1 - i32 422, label %sw.bb1 - i32 302, label %sw.bb1 - i32 303, label %sw.bb1 - i32 304, label %sw.bb1 - i32 420, label %sw.bb1 - i32 401, label %sw.epilog24 - i32 53, label %sw.bb12 - i32 44, label %sw.bb12 - ] - -sw.bb: ; preds = %while.body - unreachable - -sw.bb1: ; preds = %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body, %while.body - br i1 false, label %land.lhs.true, label %sw.epilog24 - -land.lhs.true: ; preds = %sw.bb1 - br label %sw.epilog24 - -sw.bb12: ; preds = %while.body, %while.body - switch i32 undef, label %sw.default [ - i32 47, label %sw.bb13 - i32 8, label %sw.bb13 - ] - -sw.bb13: ; preds = %sw.bb12, %sw.bb12 - unreachable - -sw.default: ; preds = %sw.bb12 - unreachable - -sw.default23: ; preds = %while.body - unreachable - -sw.epilog24: ; preds = %land.lhs.true, %sw.bb1, %while.body - %Previous.3 = phi i32 [ undef, %land.lhs.true ], [ undef, %sw.bb1 ], [ 401, %while.body ] - unreachable -} diff --git a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg6-dead-self-loop.ll b/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg6-dead-self-loop.ll deleted file mode 100644 index f9705a6948b21..0000000000000 --- a/llvm/test/Examples/IRTransforms/SimplifyCFG/tut-simplify-cfg6-dead-self-loop.ll +++ /dev/null @@ -1,25 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v1 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v2 -S < %s | FileCheck %s -; RUN: opt -tut-simplifycfg -tut-simplifycfg-version=v3 -S < %s | FileCheck %s - -define i32 @simp1() { -; CHECK-LABEL: @simp1( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 1 -; CHECK: bb.1: -; CHECK-NEXT: br label [[BB_1:%.*]] -; CHECK: bb.2: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[BB_2:%.*]] ] -; CHECK-NEXT: br label [[BB_2]] -; -entry: - ret i32 1 - -bb.1: - br label %bb.1 - -bb.2: - %p = phi i32 [ 0, %bb.2] - br label %bb.2 -} diff --git a/llvm/tools/opt/CMakeLists.txt b/llvm/tools/opt/CMakeLists.txt index 90730e324c7d4..4ea9baf447a99 100644 --- a/llvm/tools/opt/CMakeLists.txt +++ b/llvm/tools/opt/CMakeLists.txt @@ -42,7 +42,3 @@ export_executable_symbols(opt) if(WITH_POLLY AND LINK_POLLY_INTO_TOOLS) target_link_libraries(opt PRIVATE Polly) endif(WITH_POLLY AND LINK_POLLY_INTO_TOOLS) - -if(LLVM_BUILD_EXAMPLES) - target_link_libraries(opt PRIVATE ExampleIRTransforms) -endif(LLVM_BUILD_EXAMPLES) diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 092932237fd67..07eedfc659fef 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -482,10 +482,6 @@ static TargetMachine* GetTargetMachine(Triple TheTriple, StringRef CPUStr, getCodeModel(), GetCodeGenOptLevel()); } -#ifdef BUILD_EXAMPLES -void initializeExampleIRTransforms(llvm::PassRegistry &Registry); -#endif - #ifdef LINK_POLLY_INTO_TOOLS namespace polly { void initializePollyPasses(llvm::PassRegistry &Registry); @@ -567,10 +563,6 @@ int main(int argc, char **argv) { initializeWriteBitcodePassPass(Registry); initializeHardwareLoopsPass(Registry); -#ifdef BUILD_EXAMPLES - initializeExampleIRTransforms(Registry); -#endif - #ifdef LINK_POLLY_INTO_TOOLS polly::initializePollyPasses(Registry); #endif From 259ca0418ee962a69adc4057bce5d596bd3d42d1 Mon Sep 17 00:00:00 2001 From: Daniil Suchkov Date: Tue, 19 Nov 2019 14:16:39 +0700 Subject: [PATCH 254/591] [SCEV] Make SCEV verification available from command line with new PM New pass manager doesn't use verifyAnalysis, so currently there is no way to call SCEV verification from command line when new PM is used. This patch adds a pass that allows you to do that. Reviewers: reames, fhahn, sanjoy.google, nikic Reviewed By: fhahn Subscribers: hiraditya, javed.absar, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70423 --- llvm/include/llvm/Analysis/ScalarEvolution.h | 7 +++++++ llvm/lib/Analysis/ScalarEvolution.cpp | 6 ++++++ llvm/lib/Passes/PassRegistry.def | 1 + 3 files changed, 14 insertions(+) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 57f84c1d0ebf0..5286f6a220ec8 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1922,6 +1922,13 @@ class ScalarEvolutionAnalysis ScalarEvolution run(Function &F, FunctionAnalysisManager &AM); }; +/// Verifier pass for the \c ScalarEvolutionAnalysis results. +class ScalarEvolutionVerifierPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + /// Printer pass for the \c ScalarEvolutionAnalysis results. class ScalarEvolutionPrinterPass : public PassInfoMixin { diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 66c43cb451118..483159bef987f 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12040,6 +12040,12 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, AM.getResult(F)); } +PreservedAnalyses +ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { + AM.getResult(F).verify(); + return PreservedAnalyses::all(); +} + PreservedAnalyses ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { AM.getResult(F).print(OS); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index d988506b5e980..8b583bde5909c 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -240,6 +240,7 @@ FUNCTION_PASS("verify", LoopVerifierPass()) FUNCTION_PASS("verify", MemorySSAVerifierPass()) FUNCTION_PASS("verify", RegionInfoVerifierPass()) FUNCTION_PASS("verify", SafepointIRVerifierPass()) +FUNCTION_PASS("verify", ScalarEvolutionVerifierPass()) FUNCTION_PASS("view-cfg", CFGViewerPass()) FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) From cfbbdc83b41b119d78945c31eb8c3edd4bc11287 Mon Sep 17 00:00:00 2001 From: Austin Kerbow Date: Wed, 27 Nov 2019 15:07:56 -0800 Subject: [PATCH 255/591] AMDGPU/GlobalISel: Add AGPR bank and RegBankSelect mfma intrinsics Differential Revision: https://reviews.llvm.org/D70871 --- .../AMDGPU/AMDGPUGenRegisterBankInfo.def | 64 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 69 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.h | 6 + llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td | 4 + llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 5 + .../GlobalISel/regbankselect-amdgcn.mfma.mir | 943 ++++++++++++++++++ 6 files changed, 1077 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def index 85d1ad3491573..ae87cf08275f0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -32,7 +32,13 @@ enum PartialMappingIdx { PM_VGPR512 = 22, PM_VGPR1024 = 23, PM_SGPR96 = 24, - PM_VGPR96 = 25 + PM_VGPR96 = 25, + PM_AGPR96 = 26, + PM_AGPR32 = 32, + PM_AGPR64 = 33, + PM_AGPR128 = 34, + PM_AGPR512 = 36, + PM_AGPR1024 = 37 }; const RegisterBankInfo::PartialMapping PartMappings[] { @@ -58,7 +64,14 @@ const RegisterBankInfo::PartialMapping PartMappings[] { {0, 512, VGPRRegBank}, {0, 1024, VGPRRegBank}, {0, 96, SGPRRegBank}, - {0, 96, VGPRRegBank} + {0, 96, VGPRRegBank}, + {0, 96, AGPRRegBank}, + + {0, 32, AGPRRegBank}, // AGPR begin + {0, 64, AGPRRegBank}, + {0, 128, AGPRRegBank}, + {0, 512, AGPRRegBank}, + {0, 1024, AGPRRegBank} }; const RegisterBankInfo::ValueMapping ValMappings[] { @@ -94,7 +107,21 @@ const RegisterBankInfo::ValueMapping ValMappings[] { {&PartMappings[16], 1}, // 512 {&PartMappings[17], 1}, // 1024 {&PartMappings[18], 1}, - {&PartMappings[19], 1} + {&PartMappings[19], 1}, + {&PartMappings[20], 1}, + + // AGPRs + {nullptr, 0}, + {nullptr, 0}, + {nullptr, 0}, + {nullptr, 0}, + {nullptr, 0}, + {&PartMappings[21], 1}, // 32 + {&PartMappings[22], 1}, // 64 + {&PartMappings[23], 1}, // 128 + {nullptr, 0}, + {&PartMappings[24], 1}, // 512 + {&PartMappings[25], 1} // 1024 }; const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { @@ -122,7 +149,8 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, - VGPRStartIdx = 13 + VGPRStartIdx = 13, + AGPRStartIdx = 27 }; const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, @@ -139,12 +167,32 @@ const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR1 : PM_VGPR1; break; case 96: - assert(BankID != AMDGPU::VCCRegBankID); - Idx = BankID == AMDGPU::SGPRRegBankID ? PM_SGPR96 : PM_VGPR96; + switch (BankID) { + case AMDGPU::VGPRRegBankID: + Idx = PM_VGPR96; + break; + case AMDGPU::SGPRRegBankID: + Idx = PM_SGPR96; + break; + case AMDGPU::AGPRRegBankID: + Idx = PM_AGPR96; + break; + default: llvm_unreachable("Invalid register bank"); + } break; default: - assert(BankID != AMDGPU::VCCRegBankID); - Idx = BankID == AMDGPU::VGPRRegBankID ? VGPRStartIdx : SGPRStartIdx; + switch (BankID) { + case AMDGPU::VGPRRegBankID: + Idx = VGPRStartIdx; + break; + case AMDGPU::SGPRRegBankID: + Idx = SGPRStartIdx; + break; + case AMDGPU::AGPRRegBankID: + Idx = AGPRStartIdx; + break; + default: llvm_unreachable("Invalid register bank"); + } Idx += Log2_32_Ceil(Size); break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 8dae8b6c932ef..a51d3d74c899f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -106,6 +106,14 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) (void)RBVGPR; assert(&RBVGPR == &AMDGPU::VGPRRegBank); + const RegisterBank &RBAGPR = getRegBank(AMDGPU::AGPRRegBankID); + (void)RBAGPR; + assert(&RBAGPR == &AMDGPU::AGPRRegBank); +} + +static bool isVectorRegisterBank(const RegisterBank &Bank) { + unsigned BankID = Bank.getID(); + return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID; } unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, @@ -113,7 +121,7 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, unsigned Size) const { // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane? if (Dst.getID() == AMDGPU::SGPRRegBankID && - Src.getID() == AMDGPU::VGPRRegBankID) { + isVectorRegisterBank(Src)) { return std::numeric_limits::max(); } @@ -127,8 +135,8 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, if (Size == 1 && (Dst.getID() == AMDGPU::SCCRegBankID || Dst.getID() == AMDGPU::SGPRRegBankID) && - (Src.getID() == AMDGPU::SGPRRegBankID || - Src.getID() == AMDGPU::VGPRRegBankID || + (isVectorRegisterBank(Src) || + Src.getID() == AMDGPU::SGPRRegBankID || Src.getID() == AMDGPU::VCCRegBankID)) return std::numeric_limits::max(); @@ -136,6 +144,11 @@ unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, Src.getID() == AMDGPU::VCCRegBankID) return std::numeric_limits::max(); + // There is no direct copy between AGPRs. + if (Dst.getID() == AMDGPU::AGPRRegBankID && + Src.getID() == AMDGPU::AGPRRegBankID) + return 4; + return RegisterBankInfo::copyCost(Dst, Src, Size); } @@ -169,7 +182,12 @@ const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( if (&RC == &AMDGPU::SReg_1RegClass) return AMDGPU::VCCRegBank; - return TRI->isSGPRClass(&RC) ? AMDGPU::SGPRRegBank : AMDGPU::VGPRRegBank; + if (TRI->isSGPRClass(&RC)) + return AMDGPU::SGPRRegBank; + if (TRI->isAGPRClass(&RC)) + return AMDGPU::AGPRRegBank; + + return AMDGPU::VGPRRegBank; } template @@ -1908,7 +1926,7 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const { continue; Register Reg = MI.getOperand(i).getReg(); if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) { - if (Bank->getID() == AMDGPU::VGPRRegBankID) + if (isVectorRegisterBank(*Bank)) return false; assert(Bank->getID() == AMDGPU::SGPRRegBankID || @@ -2072,7 +2090,6 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, unsigned Default) const { - const RegisterBank *Bank = getRegBank(Reg, MRI, TRI); return Bank ? Bank->getID() : Default; } @@ -2102,6 +2119,14 @@ AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg, return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); } +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getAGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID, Size); +} + /// /// This function must return a legal mapping, because /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called @@ -2725,6 +2750,38 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } + case Intrinsic::amdgcn_mfma_f32_4x4x1f32: + case Intrinsic::amdgcn_mfma_f32_4x4x4f16: + case Intrinsic::amdgcn_mfma_i32_4x4x4i8: + case Intrinsic::amdgcn_mfma_f32_4x4x2bf16: + case Intrinsic::amdgcn_mfma_f32_16x16x1f32: + case Intrinsic::amdgcn_mfma_f32_16x16x4f32: + case Intrinsic::amdgcn_mfma_f32_16x16x4f16: + case Intrinsic::amdgcn_mfma_f32_16x16x16f16: + case Intrinsic::amdgcn_mfma_i32_16x16x4i8: + case Intrinsic::amdgcn_mfma_i32_16x16x16i8: + case Intrinsic::amdgcn_mfma_f32_16x16x2bf16: + case Intrinsic::amdgcn_mfma_f32_16x16x8bf16: + case Intrinsic::amdgcn_mfma_f32_32x32x1f32: + case Intrinsic::amdgcn_mfma_f32_32x32x2f32: + case Intrinsic::amdgcn_mfma_f32_32x32x4f16: + case Intrinsic::amdgcn_mfma_f32_32x32x8f16: + case Intrinsic::amdgcn_mfma_i32_32x32x4i8: + case Intrinsic::amdgcn_mfma_i32_32x32x8i8: + case Intrinsic::amdgcn_mfma_f32_32x32x2bf16: + case Intrinsic::amdgcn_mfma_f32_32x32x4bf16: { + // Default for MAI intrinsics. + // srcC can also be an immediate which can be folded later. + // FIXME: Should we eventually add an alternative mapping with AGPR src + // for srcA/srcB? + // + // vdst, srcA, srcB, srcC + OpdsMapping[0] = getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } } break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index a14b74961118a..9549e444ade54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -103,6 +103,11 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const; + // Return a value mapping for an operand that is required to be a AGPR. + const ValueMapping *getAGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p /// Regs. This appropriately sets the regbank of the new registers. void split64BitValueForMapping(MachineIRBuilder &B, @@ -131,6 +136,7 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const MachineInstr &MI, const MachineRegisterInfo &MRI) const; bool isSALUMapping(const MachineInstr &MI) const; + const InstructionMapping &getDefaultMappingSOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingAllVGPR( diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td index 00f53b1575770..ab3b176ac2147 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td @@ -18,3 +18,7 @@ def SCCRegBank : RegisterBank <"SCC", [SReg_32, SCC_CLASS]>; // It is helpful to distinguish conditions from ordinary SGPRs. def VCCRegBank : RegisterBank <"VCC", [SReg_1]>; + +def AGPRRegBank : RegisterBank <"AGPR", + [AGPR_32, AReg_64, AReg_128, AReg_512, AReg_1024] +>; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index ac3dea1a1a281..ac8c56fa3a038 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -144,6 +144,11 @@ class SIRegisterInfo final : public AMDGPURegisterInfo { return isSGPRClass(RC); } + /// \returns true if this class contains only AGPR registers + bool isAGPRClass(const TargetRegisterClass *RC) const { + return hasAGPRs(RC) && !hasVGPRs(RC); + } + /// \returns true if this class contains VGPR registers. bool hasVGPRs(const TargetRegisterClass *RC) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir new file mode 100644 index 0000000000000..54849b4c651d1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.mfma.mir @@ -0,0 +1,943 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: mfma_f32_32x32x1f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x1f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_32x32x1f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x1f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_16x16x1f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x1f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x1f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x1f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_4x4x1f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x1f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_4x4x1f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x1f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x1f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x2f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x2f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_32x32x2f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x2f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x4f32_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x4f32_vva + ; CHECK: liveins: $vgpr0, $vgpr1, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_16x16x4f32_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x4f32_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f32), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x4f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x4f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_32x32x4f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x4f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_16x16x4f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x4f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x4f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x4f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_4x4x4f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x4f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_4x4x4f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x4f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x4f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x8f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x8f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_32x32x8f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x8f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x8f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x16f16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x16f16_vva + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY]](<4 x s16>), [[COPY1]](<4 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_16x16x16f16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x16f16_sss + ; CHECK: liveins: $sgpr32_sgpr33, $sgpr34_sgpr35, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr32_sgpr33 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<4 x s16>) = COPY $sgpr34_sgpr35 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY]](<4 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<4 x s16>) = COPY [[COPY1]](<4 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), [[COPY3]](<4 x s16>), [[COPY4]](<4 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<4 x s16>) = COPY $sgpr32_sgpr33 + %1:_(<4 x s16>) = COPY $sgpr34_sgpr35 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x16f16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_i32_32x32x4i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + + ; CHECK-LABEL: name: mfma_i32_32x32x4i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_i32_32x32x4i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; CHECK-LABEL: name: mfma_i32_32x32x4i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_i32_16x16x4i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_i32_16x16x4i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_i32_16x16x4i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_i32_16x16x4i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_i32_4x4x4i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_i32_4x4x4i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_i32_4x4x4i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_i32_4x4x4i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.4x4x4i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_i32_32x32x8i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_i32_32x32x8i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_i32_32x32x8i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_i32_32x32x8i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.32x32x8i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_i32_16x16x16i8_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_i32_16x16x16i8_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr2 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_i32_16x16x16i8_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_i32_16x16x16i8_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(s32) = COPY $sgpr32 + %1:_(s32) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.i32.16x16x16i8), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x2bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x2bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<32 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_32x32x2bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + + ; CHECK-LABEL: name: mfma_f32_32x32x2bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<32 x s32>) = COPY [[COPY2]](<32 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<32 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY [[INT]](<32 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<32 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %3:_(<32 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = COPY %3 +... + +--- +name: mfma_f32_16x16x2bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x2bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x2bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_16x16x2bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_4x4x2bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x2bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_4x4x2bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_4x4x2bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.4x4x2bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_32x32x4bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x4bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<16 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_32x32x4bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + + ; CHECK-LABEL: name: mfma_f32_32x32x4bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<16 x s32>) = COPY [[COPY2]](<16 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<16 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[INT]](<16 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %3:_(<16 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.32x32x4bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3 +... + +--- +name: mfma_f32_16x16x8bf16_vva +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x8bf16_vva + ; CHECK: liveins: $vgpr0, $vgpr2, $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:agpr(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr2 + %2:_(<4 x s32>) = COPY $agpr0_agpr1_agpr2_agpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: mfma_f32_16x16x8bf16_sss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + ; CHECK-LABEL: name: mfma_f32_16x16x8bf16_sss + ; CHECK: liveins: $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr32 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr33 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[COPY4:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[COPY5:%[0-9]+]]:agpr(<4 x s32>) = COPY [[COPY2]](<4 x s32>) + ; CHECK: [[INT:%[0-9]+]]:agpr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<4 x s32>), 0, 0, 0 + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INT]](<4 x s32>) + %0:_(<2 x s16>) = COPY $sgpr32 + %1:_(<2 x s16>) = COPY $sgpr33 + %2:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.mfma.f32.16x16x8bf16), %0, %1, %2, 0, 0, 0 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... From 269c1c703d5f70421ea64ee5c919fea06156237e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 21 Nov 2019 11:24:44 +0530 Subject: [PATCH 256/591] Fix broken comment phrasing and indentation --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 488bd270ac900..5796c6e6a112c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1140,10 +1140,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MIB.addImm(0); // clamp bit } } else { - // We have to produce a carry out, and we there isn't a free SGPR - // pair for it. We can keep the whole computation on the SALU to - // avoid clobbering an additional register at the cost of an extra - // mov. + // We have to produce a carry out, and there isn't a free SGPR pair + // for it. We can keep the whole computation on the SALU to avoid + // clobbering an additional register at the cost of an extra mov. // We may have 1 free scratch SGPR even though a carry out is // unavailable. Only one additional mov is needed. @@ -1165,9 +1164,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg) .addReg(ScaledReg, RegState::Kill) .addImm(Offset); - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) - .addReg(DiffReg, RegState::Kill) - .addImm(ST.getWavefrontSizeLog2()); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(ST.getWavefrontSizeLog2()); } } } From 497a754adeca67196c12a936d09c12d6803f99de Mon Sep 17 00:00:00 2001 From: Adam Balogh Date: Fri, 29 Nov 2019 13:30:26 +0100 Subject: [PATCH 257/591] [Clang-Tidy] Quick fix for bug in bugprone-macro-parentheses 43804 Applying parentheses for statement leads to compilation error. Bug [[ 43804 | https://bugs.llvm.org/show_bug.cgi?id=43804 ]] is a compilation error suggested by a wrong fix of this checker. This patch is a quick fix for this issue. Differential Revision: https://reviews.llvm.org/D70850 --- clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp | 2 +- .../test/clang-tidy/checkers/bugprone-macro-parentheses.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp index 7ca5c1e3454b1..8d4366b51a3ec 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp @@ -54,7 +54,7 @@ static bool isSurroundedRight(const Token &T) { /// Is given TokenKind a keyword? static bool isKeyword(const Token &T) { // FIXME: better matching of keywords to avoid false positives. - return T.isOneOf(tok::kw_case, tok::kw_const, tok::kw_struct); + return T.isOneOf(tok::kw_if, tok::kw_case, tok::kw_const, tok::kw_struct); } /// Warning is written when one of these operators are not within parentheses. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-macro-parentheses.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-macro-parentheses.cpp index 2cc45e83b2037..8d128352e7894 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-macro-parentheses.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-macro-parentheses.cpp @@ -43,6 +43,7 @@ #define GOOD30(args...) std::cout << args; #define GOOD31(X) A*X=2 #define GOOD32(X) std::vector +#define GOOD33(x) if (!a__##x) a_##x = &f(#x) // These are allowed for now.. #define MAYBE1 *12.34 From bd23859f390aa81ddb1bf0b16684cce50ad9d66d Mon Sep 17 00:00:00 2001 From: Anton Afanasyev Date: Mon, 2 Dec 2019 10:07:55 +0300 Subject: [PATCH 258/591] [NFC] Precommit test showing SROA loses `!tbaa.struct` metadata This issue impacts llvm.org/pr42022 --- llvm/test/Transforms/SROA/tbaa-struct.ll | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 llvm/test/Transforms/SROA/tbaa-struct.ll diff --git a/llvm/test/Transforms/SROA/tbaa-struct.ll b/llvm/test/Transforms/SROA/tbaa-struct.ll new file mode 100644 index 0000000000000..d59e67a4cf34a --- /dev/null +++ b/llvm/test/Transforms/SROA/tbaa-struct.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -sroa %s | FileCheck %s + +; SROA should keep `!tbaa.struct` metadata + +%vector = type { float, float } +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* writeonly, i8* readonly, i64, i1 immarg) +declare <2 x float> @foo(%vector* %0) + +define void @bar(%vector* %y2) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: [[X14:%.*]] = call <2 x float> @foo(%vector* [[Y2:%.*]]) +; CHECK-NEXT: [[X7_SROA_0_0_X18_SROA_CAST:%.*]] = bitcast %vector* [[Y2]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[X14]], <2 x float>* [[X7_SROA_0_0_X18_SROA_CAST]], align 4 +; CHECK-NEXT: ret void +; + %x7 = alloca %vector + %x14 = call <2 x float> @foo(%vector* %y2) + %x15 = bitcast %vector* %x7 to <2 x float>* + store <2 x float> %x14, <2 x float>* %x15 + %x19 = bitcast %vector* %x7 to i8* + %x18 = bitcast %vector* %y2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %x18, i8* align 4 %x19, i64 8, i1 false), !tbaa.struct !10 + ret void +} + +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} +!7 = !{!"vector", !8, i64 0, !8, i64 4} +!8 = !{!"float", !4, i64 0} +!10 = !{i64 0, i64 4, !11, i64 4, i64 4, !11} +!11 = !{!8, !8, i64 0} From c653a52c85ff913bcbef007082763dbc754d6933 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 28 Nov 2019 12:19:50 +0300 Subject: [PATCH 259/591] [llvm-readobj/llvm-readelf] - Reimplement dumping of the SHT_GNU_verneed section. This is similar to D70495, but for SHT_GNU_verneed section. It solves the same problems: different implementations, lack of error reporting and no test coverage. DIfferential revision: https://reviews.llvm.org/D70826 --- .../llvm-readobj/elf-verneed-invalid.test | 236 +++++++++++++++- llvm/tools/llvm-readobj/ELFDumper.cpp | 252 ++++++++++++------ 2 files changed, 390 insertions(+), 98 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test b/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test index 971cada0195a5..53f8562989d5b 100644 --- a/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test +++ b/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test @@ -81,16 +81,18 @@ DynamicSymbols: ## this situation properly. # RUN: yaml2obj --docnum=2 %s -o %t2 -# RUN: llvm-readelf -V %t2 | FileCheck %s --check-prefix=GNU-NOLINK -# RUN: llvm-readobj -V %t2 | FileCheck %s --check-prefix=LLVM-NOLINK +# RUN: llvm-readelf -V %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=GNU-NOLINK +# RUN: llvm-readobj -V %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=LLVM-NOLINK # GNU-NOLINK: Version symbols section '.gnu.version' contains 2 entries: # GNU-NOLINK-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 5 (.dynsym) # GNU-NOLINK-NEXT: 000: 0 (*local*) 2 (bar) # GNU-NOLINK: Version needs section '.gnu.version_r' contains 1 entries: # GNU-NOLINK-NEXT: Addr: 0000000000000000 Offset: 0x000044 Link: 0 () -# GNU-NOLINK-NEXT: 0x0000: Version: 1 File: Cnt: 1 -# GNU-NOLINK-NEXT: 0x0010: Name: Flags: none Version: 2 +# GNU-NOLINK-EMPTY: +# GNU-NOLINK-NEXT: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verneed section with index 2: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL +# GNU-NOLINK-NEXT: 0x0000: Version: 1 File: Cnt: 1 +# GNU-NOLINK-NEXT: 0x0010: Name: Flags: none Version: 2 # LLVM-NOLINK: VersionSymbols [ # LLVM-NOLINK: Symbol { @@ -104,17 +106,19 @@ DynamicSymbols: # LLVM-NOLINK-NEXT: ] # LLVM-NOLINK: VersionRequirements [ +# LLVM-NOLINK-EMPTY: +# LLVM-NOLINK-NEXT: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verneed section with index 2: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL # LLVM-NOLINK-NEXT: Dependency { # LLVM-NOLINK-NEXT: Version: 1 # LLVM-NOLINK-NEXT: Count: 1 -# LLVM-NOLINK-NEXT: FileName: +# LLVM-NOLINK-NEXT: FileName: # LLVM-NOLINK-NEXT: Entries [ # LLVM-NOLINK-NEXT: Entry { # LLVM-NOLINK-NEXT: Hash: 0 # LLVM-NOLINK-NEXT: Flags [ (0x0) # LLVM-NOLINK-NEXT: ] # LLVM-NOLINK-NEXT: Index: 2 -# LLVM-NOLINK-NEXT: Name: +# LLVM-NOLINK-NEXT: Name: # LLVM-NOLINK-NEXT: } # LLVM-NOLINK-NEXT: ] # LLVM-NOLINK-NEXT: } @@ -212,14 +216,14 @@ DynamicSymbols: # LLVM-OFFSET-EQ-NEXT: Dependency { # LLVM-OFFSET-EQ-NEXT: Version: 1 # LLVM-OFFSET-EQ-NEXT: Count: 1 -# LLVM-OFFSET-EQ-NEXT: FileName: +# LLVM-OFFSET-EQ-NEXT: FileName: # LLVM-OFFSET-EQ-NEXT: Entries [ # LLVM-OFFSET-EQ-NEXT: Entry { # LLVM-OFFSET-EQ-NEXT: Hash: 0 # LLVM-OFFSET-EQ-NEXT: Flags [ (0x0) # LLVM-OFFSET-EQ-NEXT: ] # LLVM-OFFSET-EQ-NEXT: Index: 0 -# LLVM-OFFSET-EQ-NEXT: Name: +# LLVM-OFFSET-EQ-NEXT: Name: # LLVM-OFFSET-EQ-NEXT: } # LLVM-OFFSET-EQ-NEXT: ] # LLVM-OFFSET-EQ-NEXT: } @@ -227,8 +231,8 @@ DynamicSymbols: # GNU-OFFSET-EQ: Version needs section '.gnu.version_r' contains 1 entries: # GNU-OFFSET-EQ-NEXT: Addr: 0000000000000000 Offset: 0x000044 Link: 1 (.mystrtab) -# GNU-OFFSET-EQ-NEXT: 0x0000: Version: 1 File: Cnt: 1 -# GNU-OFFSET-EQ-NEXT: 0x0010: Name: Flags: none Version: 0 +# GNU-OFFSET-EQ-NEXT: 0x0000: Version: 1 File: Cnt: 1 +# GNU-OFFSET-EQ-NEXT: 0x0010: Name: Flags: none Version: 0 --- !ELF FileHeader: @@ -268,14 +272,14 @@ DynamicSymbols: # LLVM-OFFSET-GR-NEXT: Dependency { # LLVM-OFFSET-GR-NEXT: Version: 1 # LLVM-OFFSET-GR-NEXT: Count: 1 -# LLVM-OFFSET-GR-NEXT: FileName: +# LLVM-OFFSET-GR-NEXT: FileName: # LLVM-OFFSET-GR-NEXT: Entries [ # LLVM-OFFSET-GR-NEXT: Entry { # LLVM-OFFSET-GR-NEXT: Hash: 0 # LLVM-OFFSET-GR-NEXT: Flags [ (0x0) # LLVM-OFFSET-GR-NEXT: ] # LLVM-OFFSET-GR-NEXT: Index: 0 -# LLVM-OFFSET-GR-NEXT: Name: +# LLVM-OFFSET-GR-NEXT: Name: # LLVM-OFFSET-GR-NEXT: } # LLVM-OFFSET-GR-NEXT: ] # LLVM-OFFSET-GR-NEXT: } @@ -283,8 +287,8 @@ DynamicSymbols: # GNU-OFFSET-GR: Version needs section '.gnu.version_r' contains 1 entries: # GNU-OFFSET-GR-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 1 (.mystrtab) -# GNU-OFFSET-GR-NEXT: 0x0000: Version: 1 File: Cnt: 1 -# GNU-OFFSET-GR-NEXT: 0x0010: Name: Flags: none Version: 0 +# GNU-OFFSET-GR-NEXT: 0x0000: Version: 1 File: Cnt: 1 +# GNU-OFFSET-GR-NEXT: 0x0010: Name: Flags: none Version: 0 --- !ELF FileHeader: @@ -312,3 +316,207 @@ Sections: Other: 0 DynamicSymbols: - Name: foo + +## Check that we report a warning when sh_link references a non-existent section. + +# RUN: yaml2obj --docnum=6 %s -o %t6 +# RUN: llvm-readobj --sections -V %t6 2>&1 | FileCheck %s -DFILE=%t6 --implicit-check-not="warning:" --check-prefix=INVALID-LINK-LLVM +# RUN: llvm-readelf --sections -V %t6 2>&1 | FileCheck %s -DFILE=%t6 --implicit-check-not="warning:" --check-prefix=INVALID-LINK-GNU + +# INVALID-LINK-LLVM: VersionRequirements [ +# INVALID-LINK-LLVM-EMPTY: +# INVALID-LINK-LLVM-NEXT: warning: '[[FILE]]': invalid section linked to SHT_GNU_verneed section with index 1: invalid section index: 255 +# INVALID-LINK-LLVM-NEXT: Dependency { +# INVALID-LINK-LLVM-NEXT: Version: 1 +# INVALID-LINK-LLVM-NEXT: Count: 1 +# INVALID-LINK-LLVM-NEXT: FileName: +# INVALID-LINK-LLVM-NEXT: Entries [ +# INVALID-LINK-LLVM-NEXT: Entry { +# INVALID-LINK-LLVM-NEXT: Hash: 0 +# INVALID-LINK-LLVM-NEXT: Flags [ (0x0) +# INVALID-LINK-LLVM-NEXT: ] +# INVALID-LINK-LLVM-NEXT: Index: 0 +# INVALID-LINK-LLVM-NEXT: Name: +# INVALID-LINK-LLVM-NEXT: } +# INVALID-LINK-LLVM-NEXT: ] +# INVALID-LINK-LLVM-NEXT: } +# INVALID-LINK-LLVM-NEXT: ] + +# INVALID-LINK-GNU: Version needs section '.gnu.version_r' contains 1 entries: +# INVALID-LINK-GNU-EMPTY: +# INVALID-LINK-GNU-NEXT: warning: '[[FILE]]': invalid section linked to SHT_GNU_verneed section with index 1: invalid section index: 255 +# INVALID-LINK-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 255 () +# INVALID-LINK-GNU-NEXT: 0x0000: Version: 1 File: Cnt: 1 +# INVALID-LINK-GNU-NEXT: 0x0010: Name: Flags: none Version: 0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 1 + Link: 0xFF + Dependencies: + - Version: 1 + File: foo + Entries: + - Name: 'foo' + Hash: 0 + Flags: 0 + Other: 0 +DynamicSymbols: + - Name: foo + +## Check that we report a warning when we can't read the content of the SHT_GNU_verneed section. + +# RUN: yaml2obj --docnum=7 %s -o %t7 +# RUN: llvm-readobj --sections -V %t7 2>&1 | FileCheck %s -DFILE=%t7 --check-prefix=INVALID-DATA +# RUN: llvm-readelf --sections -V %t7 2>&1 | FileCheck %s -DFILE=%t7 --check-prefix=INVALID-DATA + +# INVALID-DATA: warning: '[[FILE]]': cannot read content of SHT_GNU_verneed section with index 1: section [index 1] has a sh_offset (0xffffffff) + sh_size (0x0) that cannot be represented + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 1 + Link: .dynstr + ShOffset: 0xFFFFFFFF +## Triggers creation of the .dynstr. +DynamicSymbols: + - Name: foo + +## Check that we report a warning when a SHT_GNU_verneed section contains a version dependency +## that goes past the end of the section. + +# RUN: yaml2obj --docnum=8 %s -o %t8 +# RUN: llvm-readobj --sections -V %t8 2>&1 | FileCheck %s -DFILE=%t8 --check-prefix=DEP-PAST-END +# RUN: llvm-readelf --sections -V %t8 2>&1 | FileCheck %s -DFILE=%t8 --check-prefix=DEP-PAST-END + +# DEP-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verneed section with index 1: version dependency 1 goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 1 + Link: .dynstr + ShSize: 0x1 + Dependencies: + - Version: 1 + File: foo + Entries: + - Name: 'foo' + Hash: 0 + Flags: 0 + Other: 0 +DynamicSymbols: + - Name: foo + +## Check we report a warning when a version dependency is not correctly aligned in memory. + +# RUN: yaml2obj --docnum=9 %s -o %t9 +# RUN: llvm-readobj --sections -V %t9 2>&1 | FileCheck %s -DFILE=%t9 --check-prefix=MISALIGNED-DEP +# RUN: llvm-readelf --sections -V %t9 2>&1 | FileCheck %s -DFILE=%t9 --check-prefix=MISALIGNED-DEP + +# MISALIGNED-DEP: warning: '[[FILE]]': invalid SHT_GNU_verneed section with index 1: found a misaligned version dependency entry at offset 0x0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Type: Fill + Size: 0x1 + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 1 + Link: .dynstr + Dependencies: + - Version: 1 + File: foo + Entries: + - Name: 'foo' + Hash: 0 + Flags: 0 + Other: 0 +DynamicSymbols: + - Name: foo + +## Check that we report a warning when a SHT_GNU_verneed section contains a dependency definition +## that refers to an auxiliary entry that goes past the end of the section. + +# RUN: yaml2obj --docnum=10 %s -o %t10 +# RUN: llvm-readobj --sections -V %t10 2>&1 | FileCheck %s -DFILE=%t10 --check-prefix=AUX-PAST-END +# RUN: llvm-readelf --sections -V %t10 2>&1 | FileCheck %s -DFILE=%t10 --check-prefix=AUX-PAST-END + +# AUX-PAST-END: warning: '[[FILE]]': invalid SHT_GNU_verneed section with index 1: version dependency 1 refers to an auxiliary entry that goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 1 + Link: .dynstr + ShSize: 21 + Dependencies: + - Version: 1 + File: foo + Entries: + - Name: 'foo' + Hash: 0 + Flags: 0 + Other: 0 +DynamicSymbols: + - Name: foo + +## Check we report a warning when an auxiliary entry is not correctly aligned in memory. + +# RUN: yaml2obj %s --docnum=11 -o %t11 +# RUN: llvm-readobj -V %t11 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t11 +# RUN: llvm-readelf -V %t11 2>&1 | FileCheck %s --check-prefix=MISALIGNED-AUX -DFILE=%t11 + +# MISALIGNED-AUX: warning: '[[FILE]]': invalid SHT_GNU_verneed section with index 1: found a misaligned auxiliary entry at offset 0x11 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 1 + Link: .dynstr +## The byte offset to the auxiliary entry is 0x11, i.e. it is not correctly aligned in memory. + Content: "0100010001000000110000000000000000000000" +DynamicSymbols: + - Name: foo diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 51ea599f4be74..1031e0a181dbf 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -167,6 +167,23 @@ struct VerDef { std::string Name; std::vector AuxV; }; + +struct VernAux { + unsigned Hash; + unsigned Flags; + unsigned Other; + unsigned Offset; + std::string Name; +}; + +struct VerNeed { + unsigned Version; + unsigned Cnt; + unsigned Offset; + std::string File; + std::vector AuxV; +}; + } // namespace template class ELFDumper : public ObjDumper { @@ -345,25 +362,50 @@ template class ELFDumper : public ObjDumper { Expected> getVersionDefinitions(const Elf_Shdr *Sec) const; + Expected> + getVersionDependencies(const Elf_Shdr *Sec) const; }; +static StringRef getSecTypeName(unsigned Type) { + if (Type == ELF::SHT_GNU_versym) + return "SHT_GNU_versym"; + if (Type == ELF::SHT_GNU_verdef) + return "SHT_GNU_verdef"; + if (Type == ELF::SHT_GNU_verneed) + return "SHT_GNU_verneed"; + llvm_unreachable("unexpected section type"); +} + +template +static Expected getLinkAsStrtab(const ELFFile *Obj, + const typename ELFT::Shdr *Sec, + unsigned SecNdx) { + Expected StrTabSecOrErr = + Obj->getSection(Sec->sh_link); + if (!StrTabSecOrErr) + return createError("invalid section linked to " + + getSecTypeName(Sec->sh_type) + " section with index " + + Twine(SecNdx) + ": " + + toString(StrTabSecOrErr.takeError())); + + Expected StrTabOrErr = Obj->getStringTable(*StrTabSecOrErr); + if (!StrTabOrErr) + return createError("invalid string table linked to " + + getSecTypeName(Sec->sh_type) + " section with index " + + Twine(SecNdx) + ": " + + toString(StrTabOrErr.takeError())); + return *StrTabOrErr; +} + template Expected> ELFDumper::getVersionDefinitions(const Elf_Shdr *Sec) const { const ELFFile *Obj = ObjF->getELFFile(); unsigned SecNdx = Sec - &cantFail(Obj->sections()).front(); - Expected StrTabSecOrErr = Obj->getSection(Sec->sh_link); - if (!StrTabSecOrErr) - return createError( - "invalid section linked to SHT_GNU_verdef section with index " + - Twine(SecNdx) + ": " + toString(StrTabSecOrErr.takeError())); - - Expected StrTabOrErr = Obj->getStringTable(*StrTabSecOrErr); + Expected StrTabOrErr = getLinkAsStrtab(Obj, Sec, SecNdx); if (!StrTabOrErr) - return createError( - "invalid string table linked to SHT_GNU_verdef section with index " + - Twine(SecNdx) + ": " + toString(StrTabOrErr.takeError())); + return StrTabOrErr.takeError(); Expected> ContentsOrErr = Obj->getSectionContents(Sec); if (!ContentsOrErr) @@ -448,6 +490,90 @@ ELFDumper::getVersionDefinitions(const Elf_Shdr *Sec) const { return Ret; } +template +Expected> +ELFDumper::getVersionDependencies(const Elf_Shdr *Sec) const { + const ELFFile *Obj = ObjF->getELFFile(); + unsigned SecNdx = Sec - &cantFail(Obj->sections()).front(); + + StringRef StrTab; + Expected StrTabOrErr = getLinkAsStrtab(Obj, Sec, SecNdx); + if (!StrTabOrErr) + ELFDumperStyle->reportUniqueWarning(StrTabOrErr.takeError()); + else + StrTab = *StrTabOrErr; + + Expected> ContentsOrErr = Obj->getSectionContents(Sec); + if (!ContentsOrErr) + return createError( + "cannot read content of SHT_GNU_verneed section with index " + + Twine(SecNdx) + ": " + toString(ContentsOrErr.takeError())); + + const uint8_t *Start = ContentsOrErr->data(); + const uint8_t *End = Start + ContentsOrErr->size(); + const uint8_t *VerneedBuf = Start; + + std::vector Ret; + for (unsigned I = 1; I <= /*VerneedNum=*/Sec->sh_info; ++I) { + if (VerneedBuf + sizeof(Elf_Verdef) > End) + return createError("invalid SHT_GNU_verneed section with index " + + Twine(SecNdx) + ": version dependency " + Twine(I) + + " goes past the end of the section"); + + if (uintptr_t(VerneedBuf) % sizeof(uint32_t) != 0) + return createError( + "invalid SHT_GNU_verneed section with index " + Twine(SecNdx) + + ": found a misaligned version dependency entry at offset 0x" + + Twine::utohexstr(VerneedBuf - Start)); + + const Elf_Verneed *Verneed = + reinterpret_cast(VerneedBuf); + + VerNeed &VN = *Ret.emplace(Ret.end()); + VN.Version = Verneed->vn_version; + VN.Cnt = Verneed->vn_cnt; + VN.Offset = VerneedBuf - Start; + + if (Verneed->vn_file < StrTab.size()) + VN.File = StrTab.drop_front(Verneed->vn_file); + else + VN.File = "vn_file) + ">"; + + const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux; + for (unsigned J = 0; J < Verneed->vn_cnt; ++J) { + if (uintptr_t(VernauxBuf) % sizeof(uint32_t) != 0) + return createError("invalid SHT_GNU_verneed section with index " + + Twine(SecNdx) + + ": found a misaligned auxiliary entry at offset 0x" + + Twine::utohexstr(VernauxBuf - Start)); + + if (VernauxBuf + sizeof(Elf_Vernaux) > End) + return createError( + "invalid SHT_GNU_verneed section with index " + Twine(SecNdx) + + ": version dependency " + Twine(I) + + " refers to an auxiliary entry that goes past the end " + "of the section"); + + const Elf_Vernaux *Vernaux = + reinterpret_cast(VernauxBuf); + + VernAux &Aux = *VN.AuxV.emplace(VN.AuxV.end()); + Aux.Hash = Vernaux->vna_hash; + Aux.Flags = Vernaux->vna_flags; + Aux.Other = Vernaux->vna_other; + Aux.Offset = VernauxBuf - Start; + if (StrTab.size() <= Vernaux->vna_name) + Aux.Name = ""; + else + Aux.Name = StrTab.drop_front(Vernaux->vna_name); + + VernauxBuf += Vernaux->vna_next; + } + VerneedBuf += Verneed->vn_next; + } + return Ret; +} + template void ELFDumper::printSymbolsHelper(bool IsDynamic) const { StringRef StrTable, SymtabName; @@ -3946,9 +4072,10 @@ void GNUStyle::printGNUVersionSectionProlog( SymTabName = unwrapOrError(this->FileName, Obj->getSectionName(*SymTabOrErr)); else - this->reportUniqueWarning(createError( - "invalid section linked to SHT_GNU_verdef section with index " + - Twine(SecNdx) + ": " + toString(SymTabOrErr.takeError()))); + this->reportUniqueWarning( + createError("invalid section linked to " + + getSecTypeName(Sec->sh_type) + " section with index " + + Twine(SecNdx) + ": " + toString(SymTabOrErr.takeError()))); OS << " Addr: " << format_hex_no_prefix(Sec->sh_addr, 16) << " Offset: " << format_hex(Sec->sh_offset, 8) @@ -4064,45 +4191,20 @@ void GNUStyle::printVersionDependencySection(const ELFFile *Obj, unsigned VerneedNum = Sec->sh_info; printGNUVersionSectionProlog(Obj, Sec, "Version needs", VerneedNum); - ArrayRef SecData = - unwrapOrError(this->FileName, Obj->getSectionContents(Sec)); - - const Elf_Shdr *StrTabSec = - unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); - StringRef StringTable = { - reinterpret_cast(Obj->base() + StrTabSec->sh_offset), - (size_t)StrTabSec->sh_size}; - - const uint8_t *VerneedBuf = SecData.data(); - for (unsigned I = 0; I < VerneedNum; ++I) { - const Elf_Verneed *Verneed = - reinterpret_cast(VerneedBuf); - - StringRef File = StringTable.size() > Verneed->vn_file - ? StringTable.drop_front(Verneed->vn_file) - : ""; - - OS << format(" 0x%04x: Version: %u File: %s Cnt: %u\n", - reinterpret_cast(Verneed) - SecData.begin(), - (unsigned)Verneed->vn_version, File.data(), - (unsigned)Verneed->vn_cnt); - - const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux; - for (unsigned J = 0; J < Verneed->vn_cnt; ++J) { - const Elf_Vernaux *Vernaux = - reinterpret_cast(VernauxBuf); - - StringRef Name = StringTable.size() > Vernaux->vna_name - ? StringTable.drop_front(Vernaux->vna_name) - : ""; + Expected> V = + this->dumper()->getVersionDependencies(Sec); + if (!V) { + this->reportUniqueWarning(V.takeError()); + return; + } - OS << format(" 0x%04x: Name: %s Flags: %s Version: %u\n", - reinterpret_cast(Vernaux) - SecData.begin(), - Name.data(), versionFlagToString(Vernaux->vna_flags).c_str(), - (unsigned)Vernaux->vna_other); - VernauxBuf += Vernaux->vna_next; - } - VerneedBuf += Verneed->vn_next; + for (const VerNeed &VN : *V) { + OS << format(" 0x%04x: Version: %u File: %s Cnt: %u\n", VN.Offset, + VN.Version, VN.File.data(), VN.Cnt); + for (const VernAux &Aux : VN.AuxV) + OS << format(" 0x%04x: Name: %s Flags: %s Version: %u\n", Aux.Offset, + Aux.Name.data(), versionFlagToString(Aux.Flags).c_str(), + Aux.Other); } OS << '\n'; } @@ -5853,45 +5955,27 @@ void LLVMStyle::printVersionDependencySection(const ELFFile *Obj, if (!Sec) return; - const uint8_t *SecData = - reinterpret_cast(Obj->base() + Sec->sh_offset); - const Elf_Shdr *StrTabSec = - unwrapOrError(this->FileName, Obj->getSection(Sec->sh_link)); - StringRef StringTable = { - reinterpret_cast(Obj->base() + StrTabSec->sh_offset), - (size_t)StrTabSec->sh_size}; + Expected> V = + this->dumper()->getVersionDependencies(Sec); + if (!V) { + this->reportUniqueWarning(V.takeError()); + return; + } - const uint8_t *VerneedBuf = SecData; - unsigned VerneedNum = Sec->sh_info; - for (unsigned I = 0; I < VerneedNum; ++I) { - const Elf_Verneed *Verneed = - reinterpret_cast(VerneedBuf); + for (const VerNeed &VN : *V) { DictScope Entry(W, "Dependency"); - W.printNumber("Version", Verneed->vn_version); - W.printNumber("Count", Verneed->vn_cnt); - - StringRef FileName = StringTable.size() > Verneed->vn_file - ? StringTable.drop_front(Verneed->vn_file) - : ""; - W.printString("FileName", FileName.data()); + W.printNumber("Version", VN.Version); + W.printNumber("Count", VN.Cnt); + W.printString("FileName", VN.File.c_str()); - const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux; ListScope L(W, "Entries"); - for (unsigned J = 0; J < Verneed->vn_cnt; ++J) { - const Elf_Vernaux *Vernaux = - reinterpret_cast(VernauxBuf); + for (const VernAux &Aux : VN.AuxV) { DictScope Entry(W, "Entry"); - W.printNumber("Hash", Vernaux->vna_hash); - W.printFlags("Flags", Vernaux->vna_flags, makeArrayRef(SymVersionFlags)); - W.printNumber("Index", Vernaux->vna_other); - - StringRef Name = StringTable.size() > Vernaux->vna_name - ? StringTable.drop_front(Vernaux->vna_name) - : ""; - W.printString("Name", Name.data()); - VernauxBuf += Vernaux->vna_next; + W.printNumber("Hash", Aux.Hash); + W.printFlags("Flags", Aux.Flags, makeArrayRef(SymVersionFlags)); + W.printNumber("Index", Aux.Other); + W.printString("Name", Aux.Name.c_str()); } - VerneedBuf += Verneed->vn_next; } } From 902dc6c69ce7985427efa103a7c4099c372da6fa Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Fri, 29 Nov 2019 14:58:44 +0100 Subject: [PATCH 260/591] [clangd] Fix a regression issue in local rename. Summary: The regression is that we can't rename symbols in annonymous namespaces. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70853 --- clang-tools-extra/clangd/refactor/Rename.cpp | 18 ++++++++++++------ .../clangd/unittests/RenameTests.cpp | 9 ++++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp index 6a3439cc06127..7d74641be7190 100644 --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -123,20 +123,26 @@ llvm::Optional renameable(const Decl &RenameDecl, if (RenameDecl.getParentFunctionOrMethod()) return None; + // Check whether the symbol being rename is indexable. + auto &ASTCtx = RenameDecl.getASTContext(); + bool MainFileIsHeader = isHeaderFile(MainFilePath, ASTCtx.getLangOpts()); + bool DeclaredInMainFile = + isInsideMainFile(RenameDecl.getBeginLoc(), ASTCtx.getSourceManager()); + bool IsMainFileOnly = true; + if (MainFileIsHeader) + // main file is a header, the symbol can't be main file only. + IsMainFileOnly = false; + else if (!DeclaredInMainFile) + IsMainFileOnly = false; bool IsIndexable = isa(RenameDecl) && SymbolCollector::shouldCollectSymbol( cast(RenameDecl), RenameDecl.getASTContext(), - SymbolCollector::Options(), CrossFile); + SymbolCollector::Options(), IsMainFileOnly); if (!IsIndexable) // If the symbol is not indexable, we disallow rename. return ReasonToReject::NonIndexable; if (!CrossFile) { - auto &ASTCtx = RenameDecl.getASTContext(); - const auto &SM = ASTCtx.getSourceManager(); - bool MainFileIsHeader = isHeaderFile(MainFilePath, ASTCtx.getLangOpts()); - bool DeclaredInMainFile = isInsideMainFile(RenameDecl.getBeginLoc(), SM); - if (!DeclaredInMainFile) // We are sure the symbol is used externally, bail out early. return ReasonToReject::UsedOutsideFile; diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp index 0615272de372c..1ade0c0443bc8 100644 --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -450,13 +450,20 @@ TEST(RenameTest, Renameable) { )cpp", "used outside main file", HeaderFile, Index}, - {R"cpp(// disallow -- symbol is not indexable. + {R"cpp(// disallow -- symbol in annonymous namespace in header is not indexable. namespace { class Unin^dexable {}; } )cpp", "not eligible for indexing", HeaderFile, Index}, + {R"cpp(// allow -- symbol in annonymous namespace in non-header file is indexable. + namespace { + class [[F^oo]] {}; + } + )cpp", + nullptr, !HeaderFile, Index}, + {R"cpp(// disallow -- namespace symbol isn't supported namespace n^s {} )cpp", From 5c05b4a279f51d7eaa91f008bc5dc2155d98061a Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Sun, 1 Dec 2019 15:51:37 -0800 Subject: [PATCH 261/591] [Orc] Add setters for target options and features to JITTargetMachineBuilder. Also remove redundant feature initialization steps from the detectHost method. --- .../Orc/JITTargetMachineBuilder.h | 17 +++++++++++++++++ .../Orc/JITTargetMachineBuilder.cpp | 4 +--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h index bcbd72e68f154..c8c4ecdaff160 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h @@ -91,6 +91,12 @@ class JITTargetMachineBuilder { return *this; } + /// Set subtarget features. + JITTargetMachineBuilder &setFeatures(StringRef FeatureString) { + Features = SubtargetFeatures(FeatureString); + return *this; + } + /// Add subtarget features. JITTargetMachineBuilder & addFeatures(const std::vector &FeatureVec); @@ -101,6 +107,17 @@ class JITTargetMachineBuilder { /// Access subtarget features. const SubtargetFeatures &getFeatures() const { return Features; } + /// Set TargetOptions. + /// + /// Note: This operation will overwrite any previously configured options, + /// including EmulatedTLS and ExplicitEmulatedTLS which + /// the JITTargetMachineBuilder sets by default. Clients are responsible + /// for re-enabling these overwritten options. + JITTargetMachineBuilder &setOptions(TargetOptions Options) { + this->Options = std::move(Options); + return *this; + } + /// Access TargetOptions. TargetOptions &getOptions() { return Options; } diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp index 1d3e6db913e21..114e81e41771b 100644 --- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp +++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp @@ -28,14 +28,12 @@ Expected JITTargetMachineBuilder::detectHost() { // Retrieve host CPU name and sub-target features and add them to builder. // Relocation model, code model and codegen opt level are kept to default // values. - llvm::SubtargetFeatures SubtargetFeatures; llvm::StringMap FeatureMap; llvm::sys::getHostCPUFeatures(FeatureMap); for (auto &Feature : FeatureMap) - SubtargetFeatures.AddFeature(Feature.first(), Feature.second); + TMBuilder.getFeatures().AddFeature(Feature.first(), Feature.second); TMBuilder.setCPU(llvm::sys::getHostCPUName()); - TMBuilder.addFeatures(SubtargetFeatures.getFeatures()); return TMBuilder; } From 0e7ecc651a47f818abafdfe5f928923f789fe0bc Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 2 Dec 2019 01:40:54 -0800 Subject: [PATCH 262/591] [ExecutionEngine] Add a jitTargetAddressToFunction utility function. jitTargetAddressToFunction takes a JITTargetAddress and returns a pointer of the given function pointer type suitable for calling to invoke the function at the target address. jitTargetAddressToFunction currently behaves the same as jitTargetAddressToPointer, but in the near future will be updated to perform pointer signing on architectures that require it (e.g. arm64e). For this reason it should always be preferred when generating callable pointers for JIT'd functions. --- llvm/include/llvm/ExecutionEngine/JITSymbol.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/llvm/include/llvm/ExecutionEngine/JITSymbol.h index c0f1ca4b98760..7a2a6cfa52037 100644 --- a/llvm/include/llvm/ExecutionEngine/JITSymbol.h +++ b/llvm/include/llvm/ExecutionEngine/JITSymbol.h @@ -41,6 +41,11 @@ class SymbolRef; using JITTargetAddress = uint64_t; /// Convert a JITTargetAddress to a pointer. +/// +/// Note: This is a raw cast of the address bit pattern to the given pointer +/// type. When casting to a function pointer in order to execute JIT'd code +/// jitTargetAddressToFunction should be preferred, as it will also perform +/// pointer signing on targets that require it. template T jitTargetAddressToPointer(JITTargetAddress Addr) { static_assert(std::is_pointer::value, "T must be a pointer type"); uintptr_t IntPtr = static_cast(Addr); @@ -48,6 +53,19 @@ template T jitTargetAddressToPointer(JITTargetAddress Addr) { return reinterpret_cast(IntPtr); } +/// Convert a JITTargetAddress to a callable function pointer. +/// +/// Casts the given address to a callable function pointer. This operation +/// will perform pointer signing for platforms that require it (e.g. arm64e). +template T jitTargetAddressToFunction(JITTargetAddress Addr) { + static_assert( + std::is_pointer::value && + std::is_function::type>::value, + "T must be a function pointer type"); + return jitTargetAddressToPointer(Addr); +} + +/// Convert a pointer to a JITTargetAddress. template JITTargetAddress pointerToJITTargetAddress(T *Ptr) { return static_cast(reinterpret_cast(Ptr)); } From ece8fed609061638bc867ecb74e179cb86426d60 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 2 Dec 2019 01:45:49 -0800 Subject: [PATCH 263/591] [ORC] Add a runAsMain utility function to ExecutionUtils. The runAsMain function takes a pointer to a function with a standard C main signature, int(*)(int, char*[]), and invokes it using the given arguments and program name. The arguments are copied into writable temporary storage as required by the C and C++ specifications, so runAsMain safe to use when calling main functions that modify their arguments in-place. This patch also uses the new runAsMain function to replace hand-rolled versions in lli, llvm-jitlink, and the SpeculativeJIT example. --- .../SpeculativeJIT/SpeculativeJIT.cpp | 15 +++-------- .../llvm/ExecutionEngine/Orc/ExecutionUtils.h | 11 ++++++++ .../ExecutionEngine/Orc/ExecutionUtils.cpp | 26 +++++++++++++++++++ llvm/tools/lli/lli.cpp | 15 ++++------- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 23 +++------------- 5 files changed, 48 insertions(+), 42 deletions(-) diff --git a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp index 1fd1fc92a73f9..5a0ad376bcd53 100644 --- a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp +++ b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp @@ -176,20 +176,11 @@ int main(int argc, char *argv[]) { ThreadSafeModule(std::move(M), std::move(Ctx)))); } - // Build an argv array for the JIT'd main. - std::vector ArgV; - ArgV.push_back(argv[0]); - for (const auto &InputArg : InputArgv) - ArgV.push_back(InputArg.data()); - ArgV.push_back(nullptr); - - // Look up the JIT'd main, cast it to a function pointer, then call it. - auto MainSym = ExitOnErr(SJ->lookup("main")); - int (*Main)(int, const char *[]) = - (int (*)(int, const char *[]))MainSym.getAddress(); + auto Main = + jitTargetAddressToFunction(MainSym.getAddress()); - Main(ArgV.size() - 1, ArgV.data()); + return runAsMain(Main, InputArgv, StringRef(InputFiles.front())); return 0; } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index 7d051ed990635..c797dbbbdfd97 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -40,6 +40,17 @@ namespace orc { class ObjectLayer; +/// Run a main function, returning the result. +/// +/// If the optional ProgramName argument is given then it will be inserted +/// before the strings in Args as the first argument to the called function. +/// +/// It is legal to have an empty argument list and no program name, however +/// many main functions will expect a name argument at least, and will fail +/// if none is provided. +int runAsMain(int (*Main)(int, char *[]), ArrayRef Args, + Optional ProgramName = None); + /// This iterator provides a convenient way to iterate over the elements /// of an llvm.global_ctors/llvm.global_dtors instance. /// diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp index 4a3482242dbc7..0a3fef207ac2f 100644 --- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp @@ -19,6 +19,32 @@ namespace llvm { namespace orc { +int runAsMain(int (*Main)(int, char *[]), ArrayRef Args, + Optional ProgramName) { + std::vector> ArgVStorage; + std::vector ArgV; + + ArgVStorage.reserve(Args.size() + (ProgramName ? 1 : 0)); + ArgV.reserve(Args.size() + 1 + (ProgramName ? 1 : 0)); + + if (ProgramName) { + ArgVStorage.push_back(std::make_unique(ProgramName->size() + 1)); + llvm::copy(*ProgramName, &ArgVStorage.back()[0]); + ArgVStorage.back()[ProgramName->size()] = '\0'; + ArgV.push_back(ArgVStorage.back().get()); + } + + for (auto &Arg : Args) { + ArgVStorage.push_back(std::make_unique(Arg.size() + 1)); + llvm::copy(Arg, &ArgVStorage.back()[0]); + ArgVStorage.back()[Arg.size()] = '\0'; + ArgV.push_back(ArgVStorage.back().get()); + } + ArgV.push_back(nullptr); + + return Main(Args.size(), ArgV.data()); +} + CtorDtorIterator::CtorDtorIterator(const GlobalVariable *GV, bool End) : InitList( GV ? dyn_cast_or_null(GV->getInitializer()) : nullptr), diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index ccad067214141..9aaeef0859295 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -871,16 +871,11 @@ int runOrcLazyJIT(const char *ProgName) { // Run main. auto MainSym = ExitOnErr(J->lookup("main")); - typedef int (*MainFnPtr)(int, const char *[]); - std::vector ArgV; - for (auto &Arg : Args) - ArgV.push_back(Arg.c_str()); - ArgV.push_back(nullptr); - - int ArgC = ArgV.size() - 1; - auto Main = - reinterpret_cast(static_cast(MainSym.getAddress())); - auto Result = Main(ArgC, (const char **)ArgV.data()); + + typedef int (*MainFnPtr)(int, char *[]); + auto Result = orc::runAsMain( + jitTargetAddressToFunction(MainSym.getAddress()), Args, + StringRef("lli")); // Wait for -entry-point threads. for (auto &AltEntryThread : AltEntryThreads) diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 003a333d9563d..0e0953d37b673 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -769,25 +769,6 @@ static Expected getMainEntryPoint(Session &S) { return S.ES.lookup(S.JDSearchOrder, EntryPointName); } -Expected runEntryPoint(Session &S, JITEvaluatedSymbol EntryPoint) { - assert(EntryPoint.getAddress() && "Entry point address should not be null"); - - constexpr const char *JITProgramName = ""; - auto PNStorage = std::make_unique(strlen(JITProgramName) + 1); - strcpy(PNStorage.get(), JITProgramName); - - std::vector EntryPointArgs; - EntryPointArgs.push_back(PNStorage.get()); - for (auto &InputArg : InputArgv) - EntryPointArgs.push_back(InputArg.data()); - EntryPointArgs.push_back(nullptr); - - using MainTy = int (*)(int, const char *[]); - MainTy EntryPointPtr = reinterpret_cast(EntryPoint.getAddress()); - - return EntryPointPtr(EntryPointArgs.size() - 1, EntryPointArgs.data()); -} - struct JITLinkTimers { TimerGroup JITLinkTG{"llvm-jitlink timers", "timers for llvm-jitlink phases"}; Timer LoadObjectsTimer{"load", "time to load/add object files", JITLinkTG}; @@ -841,8 +822,10 @@ int main(int argc, char *argv[]) { int Result = 0; { + using MainTy = int (*)(int, char *[]); + auto EntryFn = jitTargetAddressToFunction(EntryPoint.getAddress()); TimeRegion TR(Timers ? &Timers->RunTimer : nullptr); - Result = ExitOnErr(runEntryPoint(S, EntryPoint)); + Result = runAsMain(EntryFn, InputArgv, StringRef(InputFiles.front())); } return Result; From 7eecf2b872e506927f59b6a1f4a8546d8baaa700 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Fri, 29 Nov 2019 11:57:13 +0300 Subject: [PATCH 264/591] [llvm-readelf/llvm-readobj] - Check the version of SHT_GNU_verneed section entries. It is a follow-up for D70826 and it is similar to D70810. SHT_GNU_verneed contains the following fields: `vn_version`: Version of structure. This value is currently set to 1, and will be reset if the versioning implementation is incompatibly altered. (https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html) We should check it for correctness. Differential revision: https://reviews.llvm.org/D70842 --- .../llvm-readobj/elf-verneed-invalid.test | 31 +++++++++++++++++++ llvm/tools/llvm-readobj/ELFDumper.cpp | 6 ++++ 2 files changed, 37 insertions(+) diff --git a/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test b/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test index 53f8562989d5b..88039f89c593c 100644 --- a/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test +++ b/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test @@ -520,3 +520,34 @@ Sections: Content: "0100010001000000110000000000000000000000" DynamicSymbols: - Name: foo + +## Check how we handle the case when a dependency definition entry has an unsupported version. + +# RUN: yaml2obj %s --docnum=12 -o %t12 +# RUN: llvm-readobj -V %t12 2>&1 | FileCheck %s --check-prefix=UNSUPPORTED-VERSION -DFILE=%t12 +# RUN: llvm-readelf -V %t12 2>&1 | FileCheck %s --check-prefix=UNSUPPORTED-VERSION -DFILE=%t12 + +# UNSUPPORTED-VERSION: warning: '[[FILE]]': unable to dump SHT_GNU_verneed section with index 1: version 65278 is not yet supported + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 1 + Link: .dynstr + Dependencies: + - Version: 0xfefe + File: foo + Entries: + - Name: 'foo' + Hash: 0 + Flags: 0 + Other: 0 +DynamicSymbols: + - Name: foo diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 1031e0a181dbf..adc3ae7dcc833 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -526,6 +526,12 @@ ELFDumper::getVersionDependencies(const Elf_Shdr *Sec) const { ": found a misaligned version dependency entry at offset 0x" + Twine::utohexstr(VerneedBuf - Start)); + unsigned Version = *reinterpret_cast(VerneedBuf); + if (Version != 1) + return createError("unable to dump SHT_GNU_verneed section with index " + + Twine(SecNdx) + ": version " + Twine(Version) + + " is not yet supported"); + const Elf_Verneed *Verneed = reinterpret_cast(VerneedBuf); From a9d6b0e5444741d08ff1df7cf71d1559e7fefc1f Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Thu, 28 Nov 2019 23:18:28 +0100 Subject: [PATCH 265/591] [InstCombine] Fix big-endian miscompile of (bitcast (zext/trunc (bitcast))) Summary: optimizeVectorResize is rewriting patterns like: %1 = bitcast vector %src to integer %2 = trunc/zext %1 %dst = bitcast %2 to vector Since bitcasting between integer an vector types gives different integer values depending on endianness, we need to take endianness into account. As it happens the old implementation only produced the correct result for little endian targets. Fixes: https://bugs.llvm.org/show_bug.cgi?id=44178 Reviewers: spatel, lattner, lebedev.ri Reviewed By: spatel, lebedev.ri Subscribers: lebedev.ri, hiraditya, uabelho, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70844 --- .../InstCombine/InstCombineCasts.cpp | 79 +++++++++++++------ llvm/test/Transforms/InstCombine/cast.ll | 33 +++++--- 2 files changed, 80 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 0390368c4bb40..078a80de2df4a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DIBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" +#include using namespace llvm; using namespace PatternMatch; @@ -1820,12 +1821,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { } /// This input value (which is known to have vector type) is being zero extended -/// or truncated to the specified vector type. +/// or truncated to the specified vector type. Since the zext/trunc is done +/// using an integer type, we have a (bitcast(cast(bitcast))) pattern, +/// endianness will impact which end of the vector that is extended or +/// truncated. +/// +/// A vector is always stored with index 0 at the lowest address, which +/// corresponds to the most significant bits for a big endian stored integer and +/// the least significant bits for little endian. A trunc/zext of an integer +/// impacts the big end of the integer. Thus, we need to add/remove elements at +/// the front of the vector for big endian targets, and the back of the vector +/// for little endian targets. +/// /// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, - InstCombiner &IC) { +static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal, + VectorType *DestTy, + InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. @@ -1844,31 +1857,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, InVal = IC.Builder.CreateBitCast(InVal, SrcTy); } + bool IsBigEndian = IC.getDataLayout().isBigEndian(); + unsigned SrcElts = SrcTy->getNumElements(); + unsigned DestElts = DestTy->getNumElements(); + + assert(SrcElts != DestElts && "Element counts should be different."); + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. - SmallVector ShuffleMask; + SmallVector ShuffleMaskStorage; + ArrayRef ShuffleMask; Value *V2; - if (SrcTy->getNumElements() > DestTy->getNumElements()) { - // If we're shrinking the number of elements, just shuffle in the low - // elements from the input and use undef as the second shuffle input. - V2 = UndefValue::get(SrcTy); - for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) - ShuffleMask.push_back(i); + // Produce an identify shuffle mask for the src vector. + ShuffleMaskStorage.resize(SrcElts); + std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0); + if (SrcElts > DestElts) { + // If we're shrinking the number of elements (rewriting an integer + // truncate), just shuffle in the elements corresponding to the least + // significant bits from the input and use undef as the second shuffle + // input. + V2 = UndefValue::get(SrcTy); + // Make sure the shuffle mask selects the "least significant bits" by + // keeping elements from back of the src vector for big endian, and from the + // front for little endian. + ShuffleMask = ShuffleMaskStorage; + if (IsBigEndian) + ShuffleMask = ShuffleMask.take_back(DestElts); + else + ShuffleMask = ShuffleMask.take_front(DestElts); } else { - // If we're increasing the number of elements, shuffle in all of the - // elements from InVal and fill the rest of the result elements with zeros - // from a constant zero. + // If we're increasing the number of elements (rewriting an integer zext), + // shuffle in all of the elements from InVal. Fill the rest of the result + // elements with zeros from a constant zero. V2 = Constant::getNullValue(SrcTy); - unsigned SrcElts = SrcTy->getNumElements(); - for (unsigned i = 0, e = SrcElts; i != e; ++i) - ShuffleMask.push_back(i); - - // The excess elements reference the first element of the zero input. - for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) - ShuffleMask.push_back(SrcElts); + // Use first elt from V2 when indicating zero in the shuffle mask. + uint32_t NullElt = SrcElts; + // Extend with null values in the "most significant bits" by adding elements + // in front of the src vector for big endian, and at the back for little + // endian. + unsigned DeltaElts = DestElts - SrcElts; + if (IsBigEndian) + ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt); + else + ShuffleMaskStorage.append(DeltaElts, NullElt); + ShuffleMask = ShuffleMaskStorage; } return new ShuffleVectorInst(InVal, V2, @@ -2375,8 +2410,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { CastInst *SrcCast = cast(Src); if (BitCastInst *BCIn = dyn_cast(SrcCast->getOperand(0))) if (isa(BCIn->getOperand(0)->getType())) - if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0), - cast(DestTy), *this)) + if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts( + BCIn->getOperand(0), cast(DestTy), *this)) return I; } diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index 66eb3904ebb78..d85286b46029a 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -823,9 +823,13 @@ define i64 @test59(i8 %A, i8 %B) { } define <3 x i32> @test60(<4 x i32> %call4) { -; ALL-LABEL: @test60( -; ALL-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> -; ALL-NEXT: ret <3 x i32> [[P10]] +; BE-LABEL: @test60( +; BE-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> +; BE-NEXT: ret <3 x i32> [[P10]] +; +; LE-LABEL: @test60( +; LE-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> +; LE-NEXT: ret <3 x i32> [[P10]] ; %p11 = bitcast <4 x i32> %call4 to i128 %p9 = trunc i128 %p11 to i96 @@ -835,9 +839,13 @@ define <3 x i32> @test60(<4 x i32> %call4) { } define <4 x i32> @test61(<3 x i32> %call4) { -; ALL-LABEL: @test61( -; ALL-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> -; ALL-NEXT: ret <4 x i32> [[P10]] +; BE-LABEL: @test61( +; BE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> +; BE-NEXT: ret <4 x i32> [[P10]] +; +; LE-LABEL: @test61( +; LE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> +; LE-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x i32> %call4 to i96 %p9 = zext i96 %p11 to i128 @@ -846,10 +854,15 @@ define <4 x i32> @test61(<3 x i32> %call4) { } define <4 x i32> @test62(<3 x float> %call4) { -; ALL-LABEL: @test62( -; ALL-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> -; ALL-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> -; ALL-NEXT: ret <4 x i32> [[P10]] +; BE-LABEL: @test62( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> +; BE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> +; BE-NEXT: ret <4 x i32> [[P10]] +; +; LE-LABEL: @test62( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> +; LE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> +; LE-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x float> %call4 to i96 %p9 = zext i96 %p11 to i128 From 44b9942898c7167ed95cccef4c3da3d2113e11e8 Mon Sep 17 00:00:00 2001 From: "Wang, Pengfei" Date: Sun, 1 Dec 2019 13:35:53 +0800 Subject: [PATCH 266/591] [X86] Add initialization of MXCSR in llvm-exegesis Summary: This patch is used to initialize the new added register MXCSR. Reviewers: craig.topper, RKSimon Subscribers: tschuett, courbet, llvm-commits, LiuChen3 Tags: #llvm Differential Revision: https://reviews.llvm.org/D70874 --- .../tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s | 3 +++ llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s b/llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s index 84a928cb23d8a..c629351690cc2 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops-VFMADDSS4rm.s @@ -5,3 +5,6 @@ CHECK: mode: uops CHECK-NEXT: key: CHECK-NEXT: instructions: CHECK-NEXT: VFMADDSS4rm +CHECK: register_initial_values: +# FIXME: This will be changed to CHECK by the following patch that modeling MXCSR to VFMADDSS. +CHECK-NOT: MXCSR diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 6cf3d465dd9a6..974b2c938c3ba 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -439,6 +439,8 @@ struct ConstantInliner { std::vector popFlagAndFinalize(); + std::vector loadMXCSRAndFinalize(bool HasAVX); + private: ConstantInliner &add(const MCInst &Inst) { Instructions.push_back(Inst); @@ -499,6 +501,19 @@ std::vector ConstantInliner::popFlagAndFinalize() { return std::move(Instructions); } +std::vector ConstantInliner::loadMXCSRAndFinalize(bool HasAVX) { + add(allocateStackSpace(4)); + add(fillStackSpace(X86::MOV32mi, 0, 0x1f80)); // Mask all FP exceptions + add(MCInstBuilder(HasAVX ? X86::VLDMXCSR : X86::LDMXCSR) + // Address = ESP + .addReg(X86::RSP) // BaseReg + .addImm(1) // ScaleAmt + .addReg(0) // IndexReg + .addImm(0) // Disp + .addReg(0)); // Segment + return std::move(Instructions); +} + void ConstantInliner::initStack(unsigned Bytes) { assert(Constant_.getBitWidth() <= Bytes * 8 && "Value does not have the correct size"); @@ -699,6 +714,8 @@ std::vector ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI, } if (Reg == X86::EFLAGS) return CI.popFlagAndFinalize(); + if (Reg == X86::MXCSR) + return CI.loadMXCSRAndFinalize(STI.getFeatureBits()[X86::FeatureAVX]); return {}; // Not yet implemented. } From e9e1daf2b9e800c00a3c08db53650c1569288a1b Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 2 Dec 2019 10:29:01 +0000 Subject: [PATCH 267/591] [ARM] Remove VHADD patterns These instructions do not work quite like I expected them to. They perform the addition and then shift in a higher precision integer, so do not match up with the patterns that we added. For example with s8s, adding 100 and 100 should wrap leaving the shift to work on a negative number. VHADD will instead do the arithmetic in higher precision, giving 100 overall. The vhadd gives a "better" result, but not one that matches up with the input. I am just removing the patterns here. We might be able to re-add them in the future by checking for wrap flags or changing bitwidths. But for the moment just remove them to remove the problem cases. --- llvm/lib/Target/ARM/ARMInstrMVE.td | 54 ------------------ llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll | 72 ++++++++++++++++-------- 2 files changed, 48 insertions(+), 78 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index df38503458987..cc3a8ee77d741 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1785,60 +1785,6 @@ def MVE_VHSUBu8 : MVE_VHSUB<"u8", 0b1, 0b00>; def MVE_VHSUBu16 : MVE_VHSUB<"u16", 0b1, 0b01>; def MVE_VHSUBu32 : MVE_VHSUB<"u32", 0b1, 0b10>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (ARMvshrsImm - (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHADDs8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshrsImm - (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHADDs16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshrsImm - (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHADDs32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshruImm - (v16i8 (add (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHADDu8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshruImm - (v8i16 (add (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHADDu16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshruImm - (v4i32 (add (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHADDu32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshrsImm - (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHSUBs8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshrsImm - (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHSUBs16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshrsImm - (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHSUBs32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; - - def : Pat<(v16i8 (ARMvshruImm - (v16i8 (sub (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), 1)), - (v16i8 (MVE_VHSUBu8 - (v16i8 MQPR:$v1), (v16i8 MQPR:$v2)))>; - def : Pat<(v8i16 (ARMvshruImm - (v8i16 (sub (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), 1)), - (v8i16 (MVE_VHSUBu16 - (v8i16 MQPR:$v1), (v8i16 MQPR:$v2)))>; - def : Pat<(v4i32 (ARMvshruImm - (v4i32 (sub (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), 1)), - (v4i32 (MVE_VHSUBu32 - (v4i32 MQPR:$v1), (v4i32 MQPR:$v2)))>; -} - class MVE_VDUP pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { diff --git a/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll b/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll index 19979f203f16b..83534e2c3e833 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll @@ -4,7 +4,8 @@ define arm_aapcs_vfpcc <16 x i8> @add_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_ashr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -15,7 +16,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_ashr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -26,7 +28,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_ashr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -37,7 +40,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @add_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_lshr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -48,7 +52,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_lshr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -59,7 +64,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_lshr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -70,7 +76,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_ashr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -81,7 +88,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_ashr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -92,7 +100,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_ashr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.s32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -103,7 +112,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_lshr_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -114,7 +124,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_lshr_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -125,7 +136,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_lshr_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -140,7 +152,8 @@ define arm_aapcs_vfpcc <16 x i8> @add_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i8 q0, q0, q1 ; CHECK-NEXT: vshr.u8 q1, q0, #7 -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -153,7 +166,8 @@ define arm_aapcs_vfpcc <8 x i16> @add_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i16 q0, q0, q1 ; CHECK-NEXT: vshr.u16 q1, q0, #15 -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -166,7 +180,8 @@ define arm_aapcs_vfpcc <4 x i32> @add_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vadd.i32 q0, q0, q1 ; CHECK-NEXT: vshr.u32 q1, q0, #31 -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -177,7 +192,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @add_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: add_udiv_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <16 x i8> %src1, %src2 @@ -188,7 +204,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @add_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: add_udiv_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add <8 x i16> %src1, %src2 @@ -199,7 +216,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @add_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: add_udiv_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhadd.u32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = add nsw <4 x i32> %src1, %src2 @@ -212,7 +230,8 @@ define arm_aapcs_vfpcc <16 x i8> @sub_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i8 q0, q0, q1 ; CHECK-NEXT: vshr.u8 q1, q0, #7 -; CHECK-NEXT: vhadd.s8 q0, q0, q1 +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: vshr.s8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -225,7 +244,8 @@ define arm_aapcs_vfpcc <8 x i16> @sub_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i16 q0, q0, q1 ; CHECK-NEXT: vshr.u16 q1, q0, #15 -; CHECK-NEXT: vhadd.s16 q0, q0, q1 +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: vshr.s16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -238,7 +258,8 @@ define arm_aapcs_vfpcc <4 x i32> @sub_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vsub.i32 q0, q0, q1 ; CHECK-NEXT: vshr.u32 q1, q0, #31 -; CHECK-NEXT: vhadd.s32 q0, q0, q1 +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: vshr.s32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 @@ -249,7 +270,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @sub_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_udiv_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u8 q0, q0, q1 +; CHECK-NEXT: vsub.i8 q0, q0, q1 +; CHECK-NEXT: vshr.u8 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <16 x i8> %src1, %src2 @@ -260,7 +282,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sub_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: sub_udiv_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u16 q0, q0, q1 +; CHECK-NEXT: vsub.i16 q0, q0, q1 +; CHECK-NEXT: vshr.u16 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub <8 x i16> %src1, %src2 @@ -271,7 +294,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @sub_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: sub_udiv_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vhsub.u32 q0, q0, q1 +; CHECK-NEXT: vsub.i32 q0, q0, q1 +; CHECK-NEXT: vshr.u32 q0, q0, #1 ; CHECK-NEXT: bx lr entry: %0 = sub nsw <4 x i32> %src1, %src2 From 160a5045c699ac523eac3c7a1984705c3e86720e Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 2 Dec 2019 11:08:10 +0100 Subject: [PATCH 268/591] [lldb][NFC] Add 'breakpoint command list' test The command has zero test coverage and I'll have to touch the code formatting the output commands, so let's start by adding a test for it. --- .../command/list/TestBreakpointCommandList.py | 44 +++++++++++++++++++ .../commands/breakpoint/command/list/a.yaml | 18 ++++++++ 2 files changed, 62 insertions(+) create mode 100644 lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/TestBreakpointCommandList.py create mode 100644 lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/a.yaml diff --git a/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/TestBreakpointCommandList.py b/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/TestBreakpointCommandList.py new file mode 100644 index 0000000000000..f1a8656a73b55 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/TestBreakpointCommandList.py @@ -0,0 +1,44 @@ +""" +Test 'breakpoint command list'. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + def test_list_commands(self): + src_dir = self.getSourceDir() + yaml_path = os.path.join(src_dir, "a.yaml") + yaml_base, ext = os.path.splitext(yaml_path) + obj_path = self.getBuildArtifact("main.o") + self.yaml2obj(yaml_path, obj_path) + + # Create a target with the object file we just created from YAML + target = self.dbg.CreateTarget(obj_path) + self.assertTrue(target, VALID_TARGET) + + # Test without any breakpoints. + self.expect("breakpoint command list 1", error=True, substrs=["error: No breakpoints exist for which to list commands"]) + + # Set a breakpoint + self.runCmd("b foo") + + # Check list breakpoint commands for breakpoints that have no commands. + self.expect("breakpoint command list 1", startstr="Breakpoint 1 does not have an associated command.") + + # Add a breakpoint command. + self.runCmd("breakpoint command add -o 'source list' 1") + + # List breakpoint command that we just created. + self.expect("breakpoint command list 1", startstr="""Breakpoint 1: + Breakpoint commands: + source list +""") + + # List breakpoint command with invalid breakpoint ID. + self.expect("breakpoint command list 2", error=True, startstr="error: '2' is not a currently valid breakpoint ID.") diff --git a/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/a.yaml b/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/a.yaml new file mode 100644 index 0000000000000..1007f60c19ee3 --- /dev/null +++ b/lldb/packages/Python/lldbsuite/test/commands/breakpoint/command/list/a.yaml @@ -0,0 +1,18 @@ +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x0000000000000010 + Content: 554889E5897DFC5DC3 +Symbols: + - Name: foo + Type: STT_FUNC + Section: .text + Size: 0x0000000000000009 +... From d9542db49e90457de62af3bfe395aaf4c47b68a5 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 2 Dec 2019 10:50:23 +0000 Subject: [PATCH 269/591] [UpdateTestChecks] Share the code to parse RUN: lines between all scripts Summary: This commit also introduces a common.debug() function to avoid many `if args.verbose:` statements. Depends on D70428. Reviewers: xbolva00, MaskRay, jdoerfert Reviewed By: MaskRay Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70432 --- llvm/utils/UpdateTestChecks/common.py | 31 ++++++++++++++++-- llvm/utils/update_analyze_test_checks.py | 28 +++-------------- llvm/utils/update_cc_test_checks.py | 36 ++++++--------------- llvm/utils/update_llc_test_checks.py | 40 +++++++----------------- llvm/utils/update_mca_test_checks.py | 30 ++---------------- llvm/utils/update_mir_test_checks.py | 19 +---------- llvm/utils/update_test_checks.py | 28 +++-------------- 7 files changed, 62 insertions(+), 150 deletions(-) diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index f064605109050..dfb3b16ae6bbb 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -14,12 +14,17 @@ class string: ##### Common utilities for update_*test_checks.py +_verbose = False + def parse_commandline_args(parser): parser.add_argument('-v', '--verbose', action='store_true', help='Show verbose output') parser.add_argument('-u', '--update-only', action='store_true', help='Only update test if it was already autogened') - return parser.parse_args() + args = parser.parse_args() + global _verbose + _verbose = args.verbose + return args def should_add_line_to_output(input_line, prefix_set): # Skip any blank comment lines in the IR. @@ -53,7 +58,7 @@ def invoke_tool(exe, cmd_args, ir): ##### LLVM IR parser -RUN_LINE_RE = re.compile(r'^\s*[;#]\s*RUN:\s*(.*)$') +RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$') CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)') PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$') CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME)?:') @@ -91,6 +96,28 @@ def warn(msg, test_file=None): msg = '{}: {}'.format(msg, test_file) print('WARNING: {}'.format(msg), file=sys.stderr) +def debug(*args, **kwargs): + # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): + if 'file' not in kwargs: + kwargs['file'] = sys.stderr + if _verbose: + print(*args, **kwargs) + +def find_run_lines(test, lines): + debug('Scanning for RUN lines in test file:', test) + raw_lines = [m.group(1) + for m in [RUN_LINE_RE.match(l) for l in lines] if m] + run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] + for l in raw_lines[1:]: + if run_lines[-1].endswith('\\'): + run_lines[-1] = run_lines[-1].rstrip('\\' + ' ' + l) + else: + run_lines.append(l) + debug('Found {} RUN lines in {}:'.format(len(run_lines), test)) + for l in run_lines: + debug(' RUN: {}'.format(l)) + return run_lines + def scrub_body(body): # Scrub runs of whitespace out of the assembly, but leave the leading # whitespace in place. diff --git a/llvm/utils/update_analyze_test_checks.py b/llvm/utils/update_analyze_test_checks.py index 37803656aa208..f3572e762549e 100755 --- a/llvm/utils/update_analyze_test_checks.py +++ b/llvm/utils/update_analyze_test_checks.py @@ -69,8 +69,6 @@ def main(): test_paths = [test for pattern in args.tests for test in glob.glob(pattern)] for test in test_paths: - if args.verbose: - print('Scanning for RUN lines in test file: %s' % (test,), file=sys.stderr) with open(test) as f: input_lines = [l.rstrip() for l in f] @@ -84,20 +82,7 @@ def main(): common.warn("Skipping test which isn't autogenerated: " + test) continue - raw_lines = [m.group(1) - for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m] - run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] - for l in raw_lines[1:]: - if run_lines[-1].endswith("\\"): - run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l - else: - run_lines.append(l) - - if args.verbose: - print('Found %d RUN lines:' % (len(run_lines),), file=sys.stderr) - for l in run_lines: - print(' RUN: ' + l, file=sys.stderr) - + run_lines = common.find_run_lines(test, input_lines) prefix_list = [] for l in run_lines: if '|' not in l: @@ -132,9 +117,8 @@ def main(): for prefix in prefixes: func_dict.update({prefix: dict()}) for prefixes, opt_args in prefix_list: - if args.verbose: - print('Extracted opt cmd: ' + opt_basename + ' ' + opt_args, file=sys.stderr) - print('Extracted FileCheck prefixes: ' + str(prefixes), file=sys.stderr) + common.debug('Extracted opt cmd:', opt_basename, opt_args, file=sys.stderr) + common.debug('Extracted FileCheck prefixes:', str(prefixes), file=sys.stderr) raw_tool_outputs = common.invoke_tool(args.opt_binary, opt_args, test) @@ -147,8 +131,7 @@ def main(): is_in_function = False is_in_function_start = False prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) - if args.verbose: - print('Rewriting FileCheck prefixes: %s' % (prefix_set,), file=sys.stderr) + common.debug('Rewriting FileCheck prefixes:', str(prefix_set), file=sys.stderr) output_lines = [] output_lines.append(autogenerated_note) @@ -194,8 +177,7 @@ def main(): continue is_in_function = is_in_function_start = True - if args.verbose: - print('Writing %d lines to %s...' % (len(output_lines), test), file=sys.stderr) + common.debug('Writing %d lines to %s...' % (len(output_lines), test)) with open(test, 'wb') as f: f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) diff --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py index a3227c67743f4..414056fa6fdd8 100755 --- a/llvm/utils/update_cc_test_checks.py +++ b/llvm/utils/update_cc_test_checks.py @@ -29,7 +29,6 @@ ADVERT = '// NOTE: Assertions have been autogenerated by ' CHECK_RE = re.compile(r'^\s*//\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') -RUN_LINE_RE = re.compile(r'^//\s*RUN:\s*(.*)$') SUBST = { '%clang': [], @@ -38,9 +37,6 @@ } def get_line2spell_and_mangled(args, clang_args): - def debug_mangled(*print_args, **kwargs): - if args.verbose: - print(*print_args, file=sys.stderr, **kwargs) ret = {} # Use clang's JSON AST dump to get the mangled name json_dump_args = [args.clang, *clang_args, '-fsyntax-only', '-o', '-'] @@ -49,7 +45,7 @@ def debug_mangled(*print_args, **kwargs): # -Xclang -ast-dump=json instead: json_dump_args.append('-Xclang') json_dump_args.append('-ast-dump=json') - debug_mangled('Running', ' '.join(json_dump_args)) + common.debug('Running', ' '.join(json_dump_args)) status = subprocess.run(json_dump_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if status.returncode != 0: sys.stderr.write('Failed to run ' + ' '.join(json_dump_args) + '\n') @@ -67,20 +63,19 @@ def debug_mangled(*print_args, **kwargs): if node['kind'] != 'FunctionDecl': continue if node.get('isImplicit') is True and node.get('storageClass') == 'extern': - debug_mangled('Skipping builtin function:', node['name'], '@', node['loc']) + common.debug('Skipping builtin function:', node['name'], '@', node['loc']) continue - debug_mangled('Found function:', node['kind'], node['name'], '@', node['loc']) + common.debug('Found function:', node['kind'], node['name'], '@', node['loc']) line = node['loc'].get('line') # If there is no line it is probably a builtin function -> skip if line is None: - debug_mangled('Skipping function without line number:', node['name'], '@', node['loc']) + common.debug('Skipping function without line number:', node['name'], '@', node['loc']) continue spell = node['name'] mangled = node.get('mangledName', spell) ret[int(line)-1] = (spell, mangled) - if args.verbose: - for line, func_name in sorted(ret.items()): - print('line {}: found function {}'.format(line+1, func_name), file=sys.stderr) + for line, func_name in sorted(ret.items()): + common.debug('line {}: found function {}'.format(line+1, func_name), file=sys.stderr) if not ret: common.warn('Did not find any functions using', ' '.join(json_dump_args)) return ret @@ -191,19 +186,7 @@ def main(): continue # Extract RUN lines. - raw_lines = [m.group(1) - for m in [RUN_LINE_RE.match(l) for l in input_lines] if m] - run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] - for l in raw_lines[1:]: - if run_lines[-1].endswith("\\"): - run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l - else: - run_lines.append(l) - - if args.verbose: - print('Found {} RUN lines:'.format(len(run_lines)), file=sys.stderr) - for l in run_lines: - print(' RUN: ' + l, file=sys.stderr) + run_lines = common.find_run_lines(filename, input_lines) # Build a list of clang command lines and check prefixes from RUN lines. run_list = [] @@ -260,9 +243,8 @@ def main(): for prefix in prefixes: func_dict.update({prefix: dict()}) for prefixes, clang_args, extra_commands, triple_in_cmd in run_list: - if args.verbose: - print('Extracted clang cmd: clang {}'.format(clang_args), file=sys.stderr) - print('Extracted FileCheck prefixes: {}'.format(prefixes), file=sys.stderr) + common.debug('Extracted clang cmd: clang {}'.format(clang_args)) + common.debug('Extracted FileCheck prefixes: {}'.format(prefixes)) get_function_body(args, filename, clang_args, extra_commands, prefixes, triple_in_cmd, func_dict) diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py index 1168eec9a33cf..750650f8640c9 100755 --- a/llvm/utils/update_llc_test_checks.py +++ b/llvm/utils/update_llc_test_checks.py @@ -43,8 +43,6 @@ def main(): test_paths = [test for pattern in args.tests for test in glob.glob(pattern)] for test in test_paths: - if args.verbose: - print('Scanning for RUN lines in test file: %s' % (test,), file=sys.stderr) with open(test) as f: input_lines = [l.rstrip() for l in f] @@ -65,20 +63,7 @@ def main(): triple_in_ir = m.groups()[0] break - raw_lines = [m.group(1) - for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m] - run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] - for l in raw_lines[1:]: - if run_lines[-1].endswith("\\"): - run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l - else: - run_lines.append(l) - - if args.verbose: - print('Found %d RUN lines:' % (len(run_lines),), file=sys.stderr) - for l in run_lines: - print(' RUN: ' + l, file=sys.stderr) - + run_lines = common.find_run_lines(test, input_lines) run_list = [] for l in run_lines: if '|' not in l: @@ -115,12 +100,6 @@ def main(): llc_cmd_args = llc_cmd_args.replace('< %s', '').replace('%s', '').strip() if test.endswith('.mir'): llc_cmd_args += ' -x mir' - comment_sym = '#' - check_indent = ' ' - else: - comment_sym = ';' - check_indent = '' - check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd) for item in m.group(1).split(',')] if not check_prefixes: @@ -130,6 +109,12 @@ def main(): # now, we just ignore all but the last. run_list.append((check_prefixes, llc_cmd_args, triple_in_cmd, march_in_cmd)) + if test.endswith('.mir'): + comment_sym = '#' + check_indent = ' ' + else: + comment_sym = ';' + check_indent = '' autogenerated_note = (comment_sym + ADVERT + 'utils/' + script_name) func_dict = {} @@ -138,9 +123,8 @@ def main(): for prefix in prefixes: func_dict.update({prefix: dict()}) for prefixes, llc_args, triple_in_cmd, march_in_cmd in run_list: - if args.verbose: - print('Extracted LLC cmd: ' + llc_tool + ' ' + llc_args, file=sys.stderr) - print('Extracted FileCheck prefixes: ' + str(prefixes), file=sys.stderr) + common.debug('Extracted LLC cmd:', llc_tool, llc_args) + common.debug('Extracted FileCheck prefixes:', str(prefixes)) raw_tool_output = common.invoke_tool(args.llc_binary, llc_args, test) triple = triple_in_cmd or triple_in_ir @@ -154,8 +138,7 @@ def main(): is_in_function_start = False func_name = None prefix_set = set([prefix for p in run_list for prefix in p[0]]) - if args.verbose: - print('Rewriting FileCheck prefixes: %s' % (prefix_set,), file=sys.stderr) + common.debug('Rewriting FileCheck prefixes:', str(prefix_set)) output_lines = [] output_lines.append(autogenerated_note) @@ -199,8 +182,7 @@ def main(): continue is_in_function = is_in_function_start = True - if args.verbose: - print('Writing %d lines to %s...' % (len(output_lines), test), file=sys.stderr) + common.debug('Writing %d lines to %s...' % (len(output_lines), test)) with open(test, 'wb') as f: f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) diff --git a/llvm/utils/update_mca_test_checks.py b/llvm/utils/update_mca_test_checks.py index ba0a99392e02f..0522c80be4d5f 100755 --- a/llvm/utils/update_mca_test_checks.py +++ b/llvm/utils/update_mca_test_checks.py @@ -83,26 +83,6 @@ def _parse_args(): return args -def _find_run_lines(input_lines, args): - raw_lines = [m.group(1) - for m in [common.RUN_LINE_RE.match(l) for l in input_lines] - if m] - run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] - for l in raw_lines[1:]: - if run_lines[-1].endswith(r'\\'): - run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l - else: - run_lines.append(l) - - if args.verbose: - sys.stderr.write('Found {} RUN line{}:\n'.format( - len(run_lines), '' if len(run_lines) == 1 else 's')) - for line in run_lines: - sys.stderr.write(' RUN: {}\n'.format(line)) - - return run_lines - - def _get_run_infos(run_lines, args): run_infos = [] for run_line in run_lines: @@ -544,9 +524,7 @@ def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa return sys.stderr.write(' [{} lines total]\n'.format(len(output_lines))) - if args.verbose: - sys.stderr.write( - 'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path)) + common.debug('Writing', len(output_lines), 'lines to', test_path, '..\n\n') with open(test_path, 'wb') as f: f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) @@ -562,17 +540,13 @@ def main(): # will be written once per source location per test. _configure_warnings(args) - if args.verbose: - sys.stderr.write( - 'Scanning for RUN lines in test file: {}\n'.format(test_path)) - if not os.path.isfile(test_path): raise Error('could not find test file: {}'.format(test_path)) with open(test_path) as f: input_lines = [l.rstrip() for l in f] - run_lines = _find_run_lines(input_lines, args) + run_lines = common.find_run_lines(test_path, input_lines) run_infos = _get_run_infos(run_lines, args) common_prefix, prefix_pad = _get_useful_prefix_info(run_infos) block_infos = _get_block_infos(run_infos, test_path, args, common_prefix) diff --git a/llvm/utils/update_mir_test_checks.py b/llvm/utils/update_mir_test_checks.py index 6e90613095850..46f497007fc90 100755 --- a/llvm/utils/update_mir_test_checks.py +++ b/llvm/utils/update_mir_test_checks.py @@ -96,22 +96,6 @@ def find_triple_in_ir(lines, verbose=False): return None -def find_run_lines(test, lines, verbose=False): - raw_lines = [m.group(1) - for m in [common.RUN_LINE_RE.match(l) for l in lines] if m] - run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] - for l in raw_lines[1:]: - if run_lines[-1].endswith("\\"): - run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l - else: - run_lines.append(l) - if verbose: - log('Found {} RUN lines:'.format(len(run_lines))) - for l in run_lines: - log(' RUN: {}'.format(l)) - return run_lines - - def build_run_list(test, run_lines, verbose=False): run_list = [] all_prefixes = [] @@ -296,7 +280,6 @@ def should_add_line_to_output(input_line, prefix_set): def update_test_file(args, test): - log('Scanning for RUN lines in test file: {}'.format(test), args.verbose) with open(test) as fd: input_lines = [l.rstrip() for l in fd] @@ -313,7 +296,7 @@ def update_test_file(args, test): return triple_in_ir = find_triple_in_ir(input_lines, args.verbose) - run_lines = find_run_lines(test, input_lines, args.verbose) + run_lines = common.find_run_lines(test, input_lines) run_list, common_prefixes = build_run_list(test, run_lines, args.verbose) simple_functions = find_functions_with_one_bb(input_lines, args.verbose) diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py index 31122b2f7b755..3fd8dd7dd7e93 100755 --- a/llvm/utils/update_test_checks.py +++ b/llvm/utils/update_test_checks.py @@ -84,8 +84,6 @@ def main(): # On Windows we must expand the patterns ourselves. test_paths = [test for pattern in args.tests for test in glob.glob(pattern)] for test in test_paths: - if args.verbose: - print('Scanning for RUN lines in test file: ' + test, file=sys.stderr) with open(test) as f: input_lines = [l.rstrip() for l in f] @@ -99,20 +97,7 @@ def main(): common.warn("Skipping test which isn't autogenerated: " + test) continue - raw_lines = [m.group(1) - for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m] - run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] - for l in raw_lines[1:]: - if run_lines[-1].endswith('\\'): - run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l - else: - run_lines.append(l) - - if args.verbose: - print('Found %d RUN lines:' % (len(run_lines),), file=sys.stderr) - for l in run_lines: - print(' RUN: ' + l, file=sys.stderr) - + run_lines = common.find_run_lines(test, input_lines) prefix_list = [] for l in run_lines: if '|' not in l: @@ -146,9 +131,8 @@ def main(): for prefix in prefixes: func_dict.update({prefix: dict()}) for prefixes, opt_args in prefix_list: - if args.verbose: - print('Extracted opt cmd: ' + opt_basename + ' ' + opt_args, file=sys.stderr) - print('Extracted FileCheck prefixes: ' + str(prefixes), file=sys.stderr) + common.debug('Extracted opt cmd: ' + opt_basename + ' ' + opt_args) + common.debug('Extracted FileCheck prefixes: ' + str(prefixes)) raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test) common.build_function_body_dictionary( @@ -159,8 +143,7 @@ def main(): is_in_function = False is_in_function_start = False prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) - if args.verbose: - print('Rewriting FileCheck prefixes: %s' % (prefix_set,), file=sys.stderr) + common.debug('Rewriting FileCheck prefixes:', str(prefix_set)) output_lines = [] output_lines.append(autogenerated_note) @@ -207,8 +190,7 @@ def main(): continue is_in_function = is_in_function_start = True - if args.verbose: - print('Writing %d lines to %s...' % (len(output_lines), test), file=sys.stderr) + common.debug('Writing %d lines to %s...' % (len(output_lines), test)) with open(test, 'wb') as f: f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) From 8ab3b4defd90a10db042ec80e61160a32e5403c0 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Mon, 2 Dec 2019 10:53:57 +0000 Subject: [PATCH 270/591] [update_cc_test_checks.py] Handle extern "C" and namespaces Summary: My change to use the clang AST JSON dump did not handle functions declared inside scopes other than the root TranslationUnitDecl. After this change update_cc_test_checks.py also works for C++ test cases that use extern "C" and namespaces. Reviewers: MaskRay Reviewed By: MaskRay Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70389 --- llvm/utils/update_cc_test_checks.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py index 414056fa6fdd8..952f48e5941fc 100755 --- a/llvm/utils/update_cc_test_checks.py +++ b/llvm/utils/update_cc_test_checks.py @@ -52,28 +52,37 @@ def get_line2spell_and_mangled(args, clang_args): sys.stderr.write(status.stderr.decode()) sys.stderr.write(status.stdout.decode()) sys.exit(2) - ast = json.loads(status.stdout.decode()) - if ast['kind'] != 'TranslationUnitDecl': - common.error('Clang AST dump JSON format changed?') - sys.exit(2) - # Get the inner node and iterate over all children of type FunctionDecl. + # Parse the clang JSON and add all children of type FunctionDecl. # TODO: Should we add checks for global variables being emitted? - for node in ast['inner']: + def parse_clang_ast_json(node): + node_kind = node['kind'] + # Recurse for the following nodes that can contain nested function decls: + if node_kind in ('NamespaceDecl', 'LinkageSpecDecl', 'TranslationUnitDecl'): + for inner in node['inner']: + parse_clang_ast_json(inner) + # Otherwise we ignore everything except functions: if node['kind'] != 'FunctionDecl': - continue + return if node.get('isImplicit') is True and node.get('storageClass') == 'extern': common.debug('Skipping builtin function:', node['name'], '@', node['loc']) - continue + return common.debug('Found function:', node['kind'], node['name'], '@', node['loc']) line = node['loc'].get('line') # If there is no line it is probably a builtin function -> skip if line is None: common.debug('Skipping function without line number:', node['name'], '@', node['loc']) - continue + return spell = node['name'] mangled = node.get('mangledName', spell) ret[int(line)-1] = (spell, mangled) + + ast = json.loads(status.stdout.decode()) + if ast['kind'] != 'TranslationUnitDecl': + common.error('Clang AST dump JSON format changed?') + sys.exit(2) + parse_clang_ast_json(ast) + for line, func_name in sorted(ret.items()): common.debug('line {}: found function {}'.format(line+1, func_name), file=sys.stderr) if not ret: From 510792a2e0e3792871baa00ed34e162bba7cd9a2 Mon Sep 17 00:00:00 2001 From: Mark Murray Date: Thu, 28 Nov 2019 16:38:01 +0000 Subject: [PATCH 271/591] [ARM][MVE][Intrinsics] Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics. Summary: Add VMINQ/VMAXQ/VMINNMQ/VMAXNMQ intrinsics and their predicated versions. Add unit tests. Subscribers: kristof.beyls, hiraditya, dmgreen, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70829 --- clang/include/clang/Basic/arm_mve.td | 42 ++++++++ clang/include/clang/Basic/arm_mve_defs.td | 1 + .../test/CodeGen/arm-mve-intrinsics/vmaxnmq.c | 65 ++++++++++++ clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c | 98 +++++++++++++++++++ .../test/CodeGen/arm-mve-intrinsics/vminnmq.c | 65 ++++++++++++ clang/test/CodeGen/arm-mve-intrinsics/vminq.c | 98 +++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsARM.td | 6 ++ llvm/lib/Target/ARM/ARMInstrMVE.td | 93 +++++++++++------- .../CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll | 61 ++++++++++++ .../CodeGen/Thumb2/mve-intrinsics/vmaxq.ll | 89 +++++++++++++++++ .../CodeGen/Thumb2/mve-intrinsics/vminnmq.ll | 62 ++++++++++++ .../CodeGen/Thumb2/mve-intrinsics/vminq.ll | 89 +++++++++++++++++ 12 files changed, 731 insertions(+), 38 deletions(-) create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c create mode 100644 clang/test/CodeGen/arm-mve-intrinsics/vminq.c create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll create mode 100644 llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index dfd8097f0644f..90cccb12472c8 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -105,6 +105,26 @@ defm vornq_m: predicated_bit_op_fp<"orn_predicated">; defm vorrq_m: predicated_bit_op_fp<"orr_predicated">; } +// Predicated intrinsics - Int types only +let params = T.Int in { +def vminq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +def vmaxq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +} + +// Predicated intrinsics - Float types only +let params = T.Float in { +def vminnmq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"min_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +def vmaxnmq_m: Intrinsic< + Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred), + (IRInt<"max_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>; +} + let params = T.Int in { def vminvq: Intrinsic $prev, $vec))>; @@ -173,6 +193,28 @@ let params = T.Float in { defm: compare<"le", fcmp_le>; } +let params = T.Signed in { + def vminq: Intrinsic; + def vmaxq: Intrinsic; +} +let params = T.Unsigned in { + def vminqu: Intrinsic, + NameOverride<"vminq">; + def vmaxqu: Intrinsic, + NameOverride<"vmaxq">; +} +let params = T.Float in { + def vminnmq: Intrinsic $a, $b)>; + def vmaxnmq: Intrinsic $a, $b)>; +} + + multiclass contiguous_load same_size, list wider> { // Intrinsics named with explicit memory and element sizes that match: diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index c0ed80d456a5f..d837a1d33d000 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -107,6 +107,7 @@ def fcmp_ge: IRBuilder<"CreateFCmpOGE">; def fcmp_lt: IRBuilder<"CreateFCmpOLT">; def fcmp_le: IRBuilder<"CreateFCmpOLE">; def splat: CGHelperFn<"ARMMVEVectorSplat">; +def select: IRBuilder<"CreateSelect">; // A node that makes an Address out of a pointer-typed Value, by // providing an alignment as the second argument. diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c new file mode 100644 index 0000000000000..63300466c819e --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxnmq.c @@ -0,0 +1,65 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vmaxnmq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vmaxnmq_f16(float16x8_t a, float16x8_t b) +{ +#ifdef POLYMORPHIC + return vmaxnmq(a, b); +#else /* POLYMORPHIC */ + return vmaxnmq_f16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxnmq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vmaxnmq(a, b); +#else /* POLYMORPHIC */ + return vmaxnmq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxnmq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vmaxnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxnmq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxnmq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxnmq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vmaxnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxnmq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxnmq_m_f32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c new file mode 100644 index 0000000000000..133e28d6cf047 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmaxq.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vmaxq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]] +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) +{ +#ifdef POLYMORPHIC + return vmaxq(a, b); +#else /* POLYMORPHIC */ + return vmaxq_s8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp ult <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP1]] +// +uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) +{ +#ifdef POLYMORPHIC + return vmaxq(a, b); +#else /* POLYMORPHIC */ + return vmaxq_u16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC + return vmaxq(a, b); +#else /* POLYMORPHIC */ + return vmaxq_s32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.max.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmaxq_m_u8(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxq_m_u8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.max.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmaxq_m_s16(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxq_m_s16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmaxq_m_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmaxq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmaxq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vmaxq_m_u32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c new file mode 100644 index 0000000000000..9ed5bf0c859be --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmq.c @@ -0,0 +1,65 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vminnmq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP0]] +// +float16x8_t test_vminnmq_f16(float16x8_t a, float16x8_t b) +{ +#ifdef POLYMORPHIC + return vminnmq(a, b); +#else /* POLYMORPHIC */ + return vminnmq_f16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminnmq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// +float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) +{ +#ifdef POLYMORPHIC + return vminnmq(a, b); +#else /* POLYMORPHIC */ + return vminnmq_f32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminnmq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vminnmq_m_f16(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminnmq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminnmq_m_f16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminnmq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vminnmq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminnmq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminnmq_m_f32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c new file mode 100644 index 0000000000000..9e54eaeb5d839 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminq.c @@ -0,0 +1,98 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vminq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[B]], <16 x i8> [[A]] +// CHECK-NEXT: ret <16 x i8> [[TMP1]] +// +uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) +{ +#ifdef POLYMORPHIC + return vminq(a, b); +#else /* POLYMORPHIC */ + return vminq_u8(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[B]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP1]] +// +int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC + return vminq(a, b); +#else /* POLYMORPHIC */ + return vminq_s16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[B]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) +{ +#ifdef POLYMORPHIC + return vminq(a, b); +#else /* POLYMORPHIC */ + return vminq_u32(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vminq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminq_m_s8(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vminq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminq_m_u16(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vminq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vminq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vminq_m(inactive, a, b, p); +#else /* POLYMORPHIC */ + return vminq_m_s32(inactive, a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index bd61bf13c54d2..4d4bc5e2d82d0 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -796,6 +796,12 @@ multiclass IntrinsicSignSuffix rets, list params = [], def _u: Intrinsic; } +def int_arm_mve_min_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_max_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; def int_arm_mve_abd_predicated: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index cc3a8ee77d741..e64ab9b73700c 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1015,6 +1015,16 @@ let Predicates = [HasMVEFloat] in { (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + def : Pat<(v4f32 (int_arm_mve_max_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))), + (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + (i32 1), (v4i1 VCCR:$mask), + (v4f32 MQPR:$inactive)))>; + def : Pat<(v8f16 (int_arm_mve_max_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))), + (v8f16 (MVE_VMAXNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (i32 1), (v8i1 VCCR:$mask), + (v8f16 MQPR:$inactive)))>; } def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>; @@ -1025,6 +1035,16 @@ let Predicates = [HasMVEFloat] in { (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + def : Pat<(v4f32 (int_arm_mve_min_predicated (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive))), + (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2), + (i32 1), (v4i1 VCCR:$mask), + (v4f32 MQPR:$inactive)))>; + def : Pat<(v8f16 (int_arm_mve_min_predicated (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (v8i1 VCCR:$mask), (v8f16 MQPR:$inactive))), + (v8f16 (MVE_VMINNMf32 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2), + (i32 1), (v8i1 VCCR:$mask), + (v8f16 MQPR:$inactive)))>; } @@ -1042,48 +1062,45 @@ class MVE_VMINMAX size, let Inst{4} = bit_4; } -multiclass MVE_VMINMAX_all_sizes { - def s8 : MVE_VMINMAX; - def s16 : MVE_VMINMAX; - def s32 : MVE_VMINMAX; - def u8 : MVE_VMINMAX; - def u16 : MVE_VMINMAX; - def u32 : MVE_VMINMAX; -} +multiclass MVE_VMINMAX_m { + def "" : MVE_VMINMAX; -defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>; -defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>; + let Predicates = [HasMVEInt] in { + // Unpredicated min/max + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; - - def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), - (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; - def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), - (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; - def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), - (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + // Predicated min/max + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (!cast(NAME) + (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), + (i32 1), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; + } } +multiclass MVE_VMAX + : MVE_VMINMAX_m<"vmax", 0b0, VTI, !if(VTI.Unsigned, umax, smax), int_arm_mve_max_predicated>; +multiclass MVE_VMIN + : MVE_VMINMAX_m<"vmin", 0b1, VTI, !if(VTI.Unsigned, umin, smin), int_arm_mve_min_predicated>; + +defm MVE_VMINs8 : MVE_VMIN; +defm MVE_VMINs16 : MVE_VMIN; +defm MVE_VMINs32 : MVE_VMIN; +defm MVE_VMINu8 : MVE_VMIN; +defm MVE_VMINu16 : MVE_VMIN; +defm MVE_VMINu32 : MVE_VMIN; + +defm MVE_VMAXs8 : MVE_VMAX; +defm MVE_VMAXs16 : MVE_VMAX; +defm MVE_VMAXs32 : MVE_VMAX; +defm MVE_VMAXu8 : MVE_VMAX; +defm MVE_VMAXu16 : MVE_VMAX; +defm MVE_VMAXu32 : MVE_VMAX; + // end of mve_comp instructions // start of mve_bit instructions diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll new file mode 100644 index 0000000000000..d89308bb59412 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmq.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %0 +} + +declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>) #1 + +define dso_local arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmaxnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1 + +define dso_local arm_aapcs_vfpcc <8 x half> @test_vmaxnmq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxnmq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x half> @llvm.arm.mve.max.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2 + +define dso_local arm_aapcs_vfpcc <4 x float> @test_vmaxnmq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxnmq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmaxnmt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x float> @llvm.arm.mve.max.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll new file mode 100644 index 0000000000000..09a7d60cd1650 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxq.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a + ret <16 x i8> %1 +} + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a + ret <8 x i16> %1 +} + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vmaxq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a + ret <4 x i32> %1 +} + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vmaxq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vmaxq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vmaxq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vmaxq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vmaxq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vmaxq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll new file mode 100644 index 0000000000000..10cd674d39a8f --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmq.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminnmq_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminnm.f16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %0 +} + +declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>) #1 + +define dso_local arm_aapcs_vfpcc <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminnmq_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vminnm.f32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1 + +define dso_local arm_aapcs_vfpcc <8 x half> @test_vminnmq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminnmq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vminnmt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x half> @llvm.arm.mve.min.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>) #2 + +define dso_local arm_aapcs_vfpcc <4 x float> @test_vminnmq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminnmq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vminnmt.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x float> @llvm.arm.mve.min.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll new file mode 100644 index 0000000000000..0cbef86c928f7 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminq.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %b, <16 x i8> %a + ret <16 x i8> %1 +} + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %b, <8 x i16> %a + ret <8 x i16> %1 +} + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 { +; CHECK-LABEL: test_vminq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %b, <4 x i32> %a + ret <4 x i32> %1 +} + +define dso_local arm_aapcs_vfpcc <16 x i8> @test_vminq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s8 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2 + +declare <16 x i8> @llvm.arm.mve.min.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2 + +define dso_local arm_aapcs_vfpcc <8 x i16> @test_vminq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2 + +declare <8 x i16> @llvm.arm.mve.min.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2 + +define dso_local arm_aapcs_vfpcc <4 x i32> @test_vminq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 { +; CHECK-LABEL: test_vminq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmint.s32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2 + +declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2 From f8fb3729e9d794f174aa737351235f76e6ac46db Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 2 Dec 2019 12:44:55 +0100 Subject: [PATCH 272/591] [lldb][NFC] Make Stream's IndentLevel an unsigned integers. We expect it to be always positive values and LLVM/Clang's IndentLevel values are already unsigned integers, so we should do the same. --- lldb/include/lldb/Utility/Stream.h | 12 ++++++------ lldb/source/Target/Target.cpp | 2 +- lldb/source/Utility/Stream.cpp | 10 ++++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lldb/include/lldb/Utility/Stream.h b/lldb/include/lldb/Utility/Stream.h index 88cdb88d77adf..9982d8236a652 100644 --- a/lldb/include/lldb/Utility/Stream.h +++ b/lldb/include/lldb/Utility/Stream.h @@ -338,8 +338,8 @@ class Stream { /// Get the current indentation level. /// /// \return - /// The current indentation level as an integer. - int GetIndentLevel() const; + /// The current indentation level. + unsigned GetIndentLevel() const; /// Indent the current line in the stream. /// @@ -353,10 +353,10 @@ class Stream { size_t Indent(llvm::StringRef s); /// Decrement the current indentation level. - void IndentLess(int amount = 2); + void IndentLess(unsigned amount = 2); /// Increment the current indentation level. - void IndentMore(int amount = 2); + void IndentMore(unsigned amount = 2); /// Output an offset value. /// @@ -411,7 +411,7 @@ class Stream { /// /// \param[in] level /// The new indentation level. - void SetIndentLevel(int level); + void SetIndentLevel(unsigned level); /// Output a SLEB128 number to the stream. /// @@ -442,7 +442,7 @@ class Stream { uint32_t m_addr_size; ///< Size of an address in bytes. lldb::ByteOrder m_byte_order; ///< Byte order to use when encoding scalar types. - int m_indent_level; ///< Indention level. + unsigned m_indent_level; ///< Indention level. std::size_t m_bytes_written = 0; ///< Number of bytes written so far. void _PutHex8(uint8_t uvalue, bool add_prefix); diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index 4b9a1b77ad16d..aeb77b7cf676a 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -3177,7 +3177,7 @@ void Target::StopHook::SetThreadSpecifier(ThreadSpec *specifier) { void Target::StopHook::GetDescription(Stream *s, lldb::DescriptionLevel level) const { - int indent_level = s->GetIndentLevel(); + unsigned indent_level = s->GetIndentLevel(); s->SetIndentLevel(indent_level + 2); diff --git a/lldb/source/Utility/Stream.cpp b/lldb/source/Utility/Stream.cpp index 991f7e924d8dd..119d1e0f79643 100644 --- a/lldb/source/Utility/Stream.cpp +++ b/lldb/source/Utility/Stream.cpp @@ -185,16 +185,18 @@ Stream &Stream::operator<<(int64_t sval) { } // Get the current indentation level -int Stream::GetIndentLevel() const { return m_indent_level; } +unsigned Stream::GetIndentLevel() const { return m_indent_level; } // Set the current indentation level -void Stream::SetIndentLevel(int indent_level) { m_indent_level = indent_level; } +void Stream::SetIndentLevel(unsigned indent_level) { + m_indent_level = indent_level; +} // Increment the current indentation level -void Stream::IndentMore(int amount) { m_indent_level += amount; } +void Stream::IndentMore(unsigned amount) { m_indent_level += amount; } // Decrement the current indentation level -void Stream::IndentLess(int amount) { +void Stream::IndentLess(unsigned amount) { if (m_indent_level >= amount) m_indent_level -= amount; else From 3d5ba7c60f393096ab07880c65032d4b94880bc6 Mon Sep 17 00:00:00 2001 From: Tim Renouf Date: Wed, 27 Nov 2019 15:16:59 +0000 Subject: [PATCH 273/591] AMDGPU: Fixed indeterminate map iteration in SIPeepholeSDWA Differential Revision: https://reviews.llvm.org/D70783 Change-Id: Ic26f915a4acb4c00ecefa9d09d7c24cec370ed06 --- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 9b3b2436475ce..05c81feb23ecd 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -26,6 +26,7 @@ #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -73,8 +74,8 @@ class SIPeepholeSDWA : public MachineFunctionPass { const SIRegisterInfo *TRI; const SIInstrInfo *TII; - std::unordered_map> SDWAOperands; - std::unordered_map PotentialMatches; + MapVector> SDWAOperands; + MapVector PotentialMatches; SmallVector ConvertedInstructions; Optional foldToImm(const MachineOperand &Op) const; From e19f19b09f83b1d64b9513a8f209773d4e21ad15 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Thu, 28 Nov 2019 14:12:09 +0300 Subject: [PATCH 274/591] [llvm-readobj/llvm-readelf] - Simplify the code that dumps versions. After changes introduced in D70495 and D70826 its now possible to significantly simplify the code we have. This also fixes an issue: previous code assumed that version strings should always be read from the dynamic string table. While it is normally true, the string table should be taken from the corresponding sh_link field. Differential revision: https://reviews.llvm.org/D70855 --- .../llvm-readobj/elf-verdef-invalid.test | 45 +++++ .../llvm-readobj/elf-verneed-invalid.test | 98 +++++++--- llvm/tools/llvm-readobj/ELFDumper.cpp | 176 +++++------------- 3 files changed, 166 insertions(+), 153 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test index 65e88119d0e56..253a2179008a0 100644 --- a/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test +++ b/llvm/test/tools/llvm-readobj/elf-verdef-invalid.test @@ -262,3 +262,48 @@ Sections: Names: [] DynamicSymbols: - Name: foo + +## Check we error out when trying to print version symbols, but SHT_GNU_verdef is invalid due to any reason. + +# RUN: yaml2obj %s --docnum=10 -o %t10 +# RUN: not llvm-readobj -V %t10 2>&1 | FileCheck %s --check-prefix=INVALID-VERDEF-LLVM -DFILE=%t10 +# RUN: not llvm-readelf -V %t10 2>&1 | FileCheck %s --check-prefix=INVALID-VERDEF-GNU -DFILE=%t10 + +# INVALID-VERDEF-LLVM: VersionSymbols [ +# INVALID-VERDEF-LLVM-NEXT: Symbol { +# INVALID-VERDEF-LLVM-NEXT: Version: 0 +# INVALID-VERDEF-LLVM-NEXT: Name: +# INVALID-VERDEF-LLVM-NEXT: } +# INVALID-VERDEF-LLVM-NEXT: Symbol { +# INVALID-VERDEF-LLVM-EMPTY: +# INVALID-VERDEF-LLVM-NEXT: error: '[[FILE]]': invalid SHT_GNU_verdef section with index 2: version definition 1 goes past the end of the section + +# INVALID-VERDEF-GNU: Version symbols section '.gnu.version' contains 2 entries: +# INVALID-VERDEF-GNU-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 5 (.dynsym) +# INVALID-VERDEF-GNU-NEXT: 000: 0 (*local*) +# INVALID-VERDEF-GNU-NEXT: error: '[[FILE]]': invalid SHT_GNU_verdef section with index 2: version definition 1 goes past the end of the section + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .gnu.version + Type: SHT_GNU_versym + Flags: [ SHF_ALLOC ] + Link: .dynsym + AddressAlign: 0x0000000000000002 + EntSize: 0x0000000000000002 + Entries: [ 0, 2 ] + - Name: .gnu.version_d + Type: SHT_GNU_verdef + Flags: [ SHF_ALLOC ] + Link: .dynstr + AddressAlign: 0x4 + Info: 0x1 + Entries: [] +DynamicSymbols: + - Name: foo + Binding: STB_GLOBAL diff --git a/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test b/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test index 88039f89c593c..81976539bbcb5 100644 --- a/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test +++ b/llvm/test/tools/llvm-readobj/elf-verneed-invalid.test @@ -86,11 +86,12 @@ DynamicSymbols: # GNU-NOLINK: Version symbols section '.gnu.version' contains 2 entries: # GNU-NOLINK-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 5 (.dynsym) -# GNU-NOLINK-NEXT: 000: 0 (*local*) 2 (bar) +# GNU-NOLINK-NEXT: 000: 0 (*local*) +# GNU-NOLINK-NEXT: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verneed section with index 2: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL +# GNU-NOLINK-NEXT: 2 () +# GNU-NOLINK-EMPTY: # GNU-NOLINK: Version needs section '.gnu.version_r' contains 1 entries: # GNU-NOLINK-NEXT: Addr: 0000000000000000 Offset: 0x000044 Link: 0 () -# GNU-NOLINK-EMPTY: -# GNU-NOLINK-NEXT: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verneed section with index 2: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL # GNU-NOLINK-NEXT: 0x0000: Version: 1 File: Cnt: 1 # GNU-NOLINK-NEXT: 0x0010: Name: Flags: none Version: 2 @@ -100,14 +101,14 @@ DynamicSymbols: # LLVM-NOLINK-NEXT: Name: # LLVM-NOLINK-NEXT: } # LLVM-NOLINK-NEXT: Symbol { +# LLVM-NOLINK-EMPTY: +# LLVM-NOLINK-NEXT: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verneed section with index 2: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL # LLVM-NOLINK-NEXT: Version: 2 -# LLVM-NOLINK-NEXT: Name: foo@bar +# LLVM-NOLINK-NEXT: Name: foo@ # LLVM-NOLINK-NEXT: } # LLVM-NOLINK-NEXT: ] # LLVM-NOLINK: VersionRequirements [ -# LLVM-NOLINK-EMPTY: -# LLVM-NOLINK-NEXT: warning: '[[FILE]]': invalid string table linked to SHT_GNU_verneed section with index 2: invalid sh_type for string table section [index 0]: expected SHT_STRTAB, but got SHT_NULL # LLVM-NOLINK-NEXT: Dependency { # LLVM-NOLINK-NEXT: Version: 1 # LLVM-NOLINK-NEXT: Count: 1 @@ -155,14 +156,12 @@ DynamicSymbols: Binding: STB_GLOBAL ## We can't parse misaligned auxiliary version records. -## Here we have a SHT_GNU_verneed section aligned by 1 byte. -## This makes the first auxiliary record offset % 4 be non-zero. # RUN: yaml2obj --docnum=3 %s -o %t3 # RUN: not llvm-readelf -V %t3 2>&1 | FileCheck %s -DFILE=%t3 --check-prefix=BROKEN-AUX # RUN: not llvm-readobj -V %t3 2>&1 | FileCheck %s -DFILE=%t3 --check-prefix=BROKEN-AUX -# BROKEN-AUX: error: '[[FILE]]': SHT_GNU_verneed: the vn_aux field of the entry with index 0 references a misaligned auxiliary record +# BROKEN-AUX: error: '[[FILE]]': invalid SHT_GNU_verneed section with index 2: found a misaligned auxiliary entry at offset 0x11 --- !ELF FileHeader: @@ -176,19 +175,14 @@ Sections: Flags: [ SHF_ALLOC ] Link: .dynsym Entries: [ 2 ] - - Name: .gnu.version_r - Type: SHT_GNU_verneed - Flags: [ SHF_ALLOC ] - Info: 1 - AddressAlign: 1 - Dependencies: - - Version: 1 - File: somefile - Entries: - - Name: 'bar' - Hash: 0 - Flags: 0 - Other: 2 + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Info: 1 + Link: .dynstr + AddressAlign: 4 +## The byte offset to the auxiliary entry is 0x11, i.e. it is not correctly aligned in memory. + Content: "0100010001000000110000000000000000000000" DynamicSymbols: - Name: foo @@ -551,3 +545,63 @@ Sections: Other: 0 DynamicSymbols: - Name: foo + +## In this case SHT_GNU_verneed is linked to a custom dynamic string table, which is not +## called ".dynstr". Check we handle this case properly. + +# RUN: yaml2obj --docnum=13 %s -o %t13 +# RUN: llvm-readelf -V %t13 2>&1 | FileCheck %s -DFILE=%t13 --check-prefix=GNU-CUSTOM-DYNSTR +# RUN: llvm-readobj -V %t13 2>&1 | FileCheck %s -DFILE=%t13 --check-prefix=LLVM-CUSTOM-DYNSTR + +# GNU-CUSTOM-DYNSTR: Version symbols section '.gnu.version' contains 2 entries: +# GNU-CUSTOM-DYNSTR-NEXT: Addr: 0000000000000000 Offset: 0x000040 Link: 6 (.dynsym) +# GNU-CUSTOM-DYNSTR-NEXT: 000: 0 (*local*) 2 (bcdefghij) +# GNU-CUSTOM-DYNSTR: Version needs section '.gnu.version_r' contains 1 entries: +# GNU-CUSTOM-DYNSTR-NEXT: Addr: 0000000000000000 Offset: 0x000044 Link: 3 (.custom.dynstr) +# GNU-CUSTOM-DYNSTR-NEXT: 0x0000: Version: 1 File: j Cnt: 1 +# GNU-CUSTOM-DYNSTR-NEXT: 0x0010: Name: bcdefghij Flags: none Version: 2 + +# LLVM-CUSTOM-DYNSTR: VersionSymbols [ +# LLVM-CUSTOM-DYNSTR: Symbol { +# LLVM-CUSTOM-DYNSTR: Version: 2 +# LLVM-CUSTOM-DYNSTR-NEXT: Name: foo@bcdefghij + +# LLVM-CUSTOM-DYNSTR: VersionRequirements [ +# LLVM-CUSTOM-DYNSTR: Dependency { +# LLVM-CUSTOM-DYNSTR: Entries [ +# LLVM-CUSTOM-DYNSTR: Entry { +# LLVM-CUSTOM-DYNSTR: Index: 2 +# LLVM-CUSTOM-DYNSTR-NEXT: Name: bcdefghij + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .gnu.version + Type: SHT_GNU_versym + Flags: [ SHF_ALLOC ] + Link: .dynsym + Entries: [ 0, 2 ] + - Name: .gnu.version_r + Type: SHT_GNU_verneed + Flags: [ SHF_ALLOC ] + Link: .custom.dynstr + Info: 1 + AddressAlign: 4 + Dependencies: + - Version: 1 + File: zed + Entries: + - Name: 'bar' + Hash: 0 + Flags: 0 + Other: 2 + - Name: .custom.dynstr + Type: SHT_STRTAB + Content: "6162636465666768696a00" ## 'a','b','c','d','e','f','g','h','i','j',NIL +DynamicSymbols: + - Name: foo + Binding: STB_GLOBAL diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index adc3ae7dcc833..8a9fd33c51687 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -258,11 +258,8 @@ template class ELFDumper : public ObjDumper { void loadDynamicTable(const ELFFile *Obj); void parseDynamicTable(); - StringRef getSymbolVersion(StringRef StrTab, const Elf_Sym *symb, - bool &IsDefault) const; + StringRef getSymbolVersion(const Elf_Sym *symb, bool &IsDefault) const; void LoadVersionMap() const; - void LoadVersionNeeds(const Elf_Shdr *ec) const; - void LoadVersionDefs(const Elf_Shdr *sec) const; const object::ELFObjectFile *ObjF; DynRegionInfo DynRelRegion; @@ -285,29 +282,11 @@ template class ELFDumper : public ObjDumper { const Elf_Shdr *SymbolVersionNeedSection = nullptr; // .gnu.version_r const Elf_Shdr *SymbolVersionDefSection = nullptr; // .gnu.version_d - // Records for each version index the corresponding Verdef or Vernaux entry. - // This is filled the first time LoadVersionMap() is called. - class VersionMapEntry : public PointerIntPair { - public: - // If the integer is 0, this is an Elf_Verdef*. - // If the integer is 1, this is an Elf_Vernaux*. - VersionMapEntry() : PointerIntPair(nullptr, 0) {} - VersionMapEntry(const Elf_Verdef *verdef) - : PointerIntPair(verdef, 0) {} - VersionMapEntry(const Elf_Vernaux *vernaux) - : PointerIntPair(vernaux, 1) {} - - bool isNull() const { return getPointer() == nullptr; } - bool isVerdef() const { return !isNull() && getInt() == 0; } - bool isVernaux() const { return !isNull() && getInt() == 1; } - const Elf_Verdef *getVerdef() const { - return isVerdef() ? (const Elf_Verdef *)getPointer() : nullptr; - } - const Elf_Vernaux *getVernaux() const { - return isVernaux() ? (const Elf_Vernaux *)getPointer() : nullptr; - } + struct VersionEntry { + std::string Name; + bool IsVerDef; }; - mutable SmallVector VersionMap; + mutable SmallVector, 16> VersionMap; public: Elf_Dyn_Range dynamic_table() const { @@ -340,8 +319,7 @@ template class ELFDumper : public ObjDumper { unsigned SectionIndex) const; Expected getStaticSymbolName(uint32_t Index) const; std::string getDynamicString(uint64_t Value) const; - StringRef getSymbolVersionByIndex(StringRef StrTab, - uint32_t VersionSymbolIndex, + StringRef getSymbolVersionByIndex(uint32_t VersionSymbolIndex, bool &IsDefault) const; void printSymbolsHelper(bool IsDynamic) const; @@ -909,78 +887,6 @@ std::error_code createELFDumper(const object::ObjectFile *Obj, } // end namespace llvm -// Iterate through the versions needed section, and place each Elf_Vernaux -// in the VersionMap according to its index. -template -void ELFDumper::LoadVersionNeeds(const Elf_Shdr *Sec) const { - unsigned VerneedSize = Sec->sh_size; // Size of section in bytes - unsigned VerneedEntries = Sec->sh_info; // Number of Verneed entries - const uint8_t *VerneedStart = reinterpret_cast( - ObjF->getELFFile()->base() + Sec->sh_offset); - const uint8_t *VerneedEnd = VerneedStart + VerneedSize; - // The first Verneed entry is at the start of the section. - const uint8_t *VerneedBuf = VerneedStart; - for (unsigned VerneedIndex = 0; VerneedIndex < VerneedEntries; - ++VerneedIndex) { - if (VerneedBuf + sizeof(Elf_Verneed) > VerneedEnd) - report_fatal_error("Section ended unexpectedly while scanning " - "version needed records."); - const Elf_Verneed *Verneed = - reinterpret_cast(VerneedBuf); - if (Verneed->vn_version != ELF::VER_NEED_CURRENT) - report_fatal_error("Unexpected verneed version"); - // Iterate through the Vernaux entries - const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux; - for (unsigned VernauxIndex = 0; VernauxIndex < Verneed->vn_cnt; - ++VernauxIndex) { - if (VernauxBuf + sizeof(Elf_Vernaux) > VerneedEnd) - report_fatal_error("Section ended unexpected while scanning auxiliary " - "version needed records."); - if ((ptrdiff_t)VernauxBuf % sizeof(uint32_t) != 0) - reportError(createError("SHT_GNU_verneed: the vn_aux field of the " - "entry with index " + - Twine(VerneedIndex) + - " references a misaligned auxiliary record"), - ObjF->getFileName()); - - const Elf_Vernaux *Vernaux = - reinterpret_cast(VernauxBuf); - size_t Index = Vernaux->vna_other & ELF::VERSYM_VERSION; - if (Index >= VersionMap.size()) - VersionMap.resize(Index + 1); - VersionMap[Index] = VersionMapEntry(Vernaux); - VernauxBuf += Vernaux->vna_next; - } - VerneedBuf += Verneed->vn_next; - } -} - -// Iterate through the version definitions, and place each Elf_Verdef -// in the VersionMap according to its index. -template -void ELFDumper::LoadVersionDefs(const Elf_Shdr *Sec) const { - unsigned VerdefSize = Sec->sh_size; // Size of section in bytes - unsigned VerdefEntries = Sec->sh_info; // Number of Verdef entries - const uint8_t *VerdefStart = reinterpret_cast( - ObjF->getELFFile()->base() + Sec->sh_offset); - const uint8_t *VerdefEnd = VerdefStart + VerdefSize; - // The first Verdef entry is at the start of the section. - const uint8_t *VerdefBuf = VerdefStart; - for (unsigned VerdefIndex = 0; VerdefIndex < VerdefEntries; ++VerdefIndex) { - if (VerdefBuf + sizeof(Elf_Verdef) > VerdefEnd) - report_fatal_error("Section ended unexpectedly while scanning " - "version definitions."); - const Elf_Verdef *Verdef = reinterpret_cast(VerdefBuf); - if (Verdef->vd_version != ELF::VER_DEF_CURRENT) - report_fatal_error("Unexpected verdef version"); - size_t Index = Verdef->vd_ndx & ELF::VERSYM_VERSION; - if (Index >= VersionMap.size()) - VersionMap.resize(Index + 1); - VersionMap[Index] = VersionMapEntry(Verdef); - VerdefBuf += Verdef->vd_next; - } -} - template void ELFDumper::LoadVersionMap() const { // If there is no dynamic symtab or version table, there is nothing to do. if (!DynSymRegion.Addr || !SymbolVersionSection) @@ -992,19 +898,37 @@ template void ELFDumper::LoadVersionMap() const { // The first two version indexes are reserved. // Index 0 is LOCAL, index 1 is GLOBAL. - VersionMap.push_back(VersionMapEntry()); - VersionMap.push_back(VersionMapEntry()); + VersionMap.push_back(VersionEntry()); + VersionMap.push_back(VersionEntry()); - if (SymbolVersionDefSection) - LoadVersionDefs(SymbolVersionDefSection); + auto InsertEntry = [this](unsigned N, StringRef Version, bool IsVerdef) { + if (N >= VersionMap.size()) + VersionMap.resize(N + 1); + VersionMap[N] = {Version, IsVerdef}; + }; - if (SymbolVersionNeedSection) - LoadVersionNeeds(SymbolVersionNeedSection); + if (SymbolVersionDefSection) { + Expected> Defs = + this->getVersionDefinitions(SymbolVersionDefSection); + if (!Defs) + reportError(Defs.takeError(), ObjF->getFileName()); + for (const VerDef &Def : *Defs) + InsertEntry(Def.Ndx & ELF::VERSYM_VERSION, Def.Name, true); + } + + if (SymbolVersionNeedSection) { + Expected> Deps = + this->getVersionDependencies(SymbolVersionNeedSection); + if (!Deps) + reportError(Deps.takeError(), ObjF->getFileName()); + for (const VerNeed &Dep : *Deps) + for (const VernAux &Aux : Dep.AuxV) + InsertEntry(Aux.Other & ELF::VERSYM_VERSION, Aux.Name, false); + } } template -StringRef ELFDumper::getSymbolVersion(StringRef StrTab, - const Elf_Sym *Sym, +StringRef ELFDumper::getSymbolVersion(const Elf_Sym *Sym, bool &IsDefault) const { // This is a dynamic symbol. Look in the GNU symbol version table. if (!SymbolVersionSection) { @@ -1022,7 +946,7 @@ StringRef ELFDumper::getSymbolVersion(StringRef StrTab, const Elf_Versym *Versym = unwrapOrError( ObjF->getFileName(), ObjF->getELFFile()->template getEntry( SymbolVersionSection, EntryIndex)); - return this->getSymbolVersionByIndex(StrTab, Versym->vs_index, IsDefault); + return this->getSymbolVersionByIndex(Versym->vs_index, IsDefault); } static std::string maybeDemangle(StringRef Name) { @@ -1049,8 +973,7 @@ ELFDumper::getStaticSymbolName(uint32_t Index) const { } template -StringRef ELFDumper::getSymbolVersionByIndex(StringRef StrTab, - uint32_t SymbolVersionIndex, +StringRef ELFDumper::getSymbolVersionByIndex(uint32_t SymbolVersionIndex, bool &IsDefault) const { size_t VersionIndex = SymbolVersionIndex & VERSYM_VERSION; @@ -1062,23 +985,15 @@ StringRef ELFDumper::getSymbolVersionByIndex(StringRef StrTab, // Lookup this symbol in the version table. LoadVersionMap(); - if (VersionIndex >= VersionMap.size() || VersionMap[VersionIndex].isNull()) + if (VersionIndex >= VersionMap.size() || !VersionMap[VersionIndex]) reportError(createError("Invalid version entry"), ObjF->getFileName()); - const VersionMapEntry &Entry = VersionMap[VersionIndex]; - // Get the version name string. - size_t NameOffset; - if (Entry.isVerdef()) { - // The first Verdaux entry holds the name. - NameOffset = Entry.getVerdef()->getAux()->vda_name; + const VersionEntry &Entry = *VersionMap[VersionIndex]; + if (Entry.IsVerDef) IsDefault = !(SymbolVersionIndex & VERSYM_HIDDEN); - } else { - NameOffset = Entry.getVernaux()->vna_name; + else IsDefault = false; - } - if (NameOffset >= StrTab.size()) - reportError(createError("Invalid string offset"), ObjF->getFileName()); - return StrTab.data() + NameOffset; + return Entry.Name.c_str(); } template @@ -1109,7 +1024,7 @@ std::string ELFDumper::getFullSymbolName(const Elf_Sym *Symbol, return SymbolName; bool IsDefault; - StringRef Version = getSymbolVersion(StrTable, &*Symbol, IsDefault); + StringRef Version = getSymbolVersion(&*Symbol, IsDefault); if (!Version.empty()) { SymbolName += (IsDefault ? "@@" : "@"); SymbolName += Version; @@ -4100,7 +4015,6 @@ void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, const uint8_t *VersymBuf = reinterpret_cast(Obj->base() + Sec->sh_offset); const ELFDumper *Dumper = this->dumper(); - StringRef StrTable = Dumper->getDynamicStringTable(); // readelf prints 4 entries per line. for (uint64_t VersymRow = 0; VersymRow < Entries; VersymRow += 4) { @@ -4119,17 +4033,17 @@ void GNUStyle::printVersionSymbolSection(const ELFFile *Obj, OS << " 1 (*global*) "; break; default: - OS << format("%4x%c", Versym->vs_index & VERSYM_VERSION, - Versym->vs_index & VERSYM_HIDDEN ? 'h' : ' '); - bool IsDefault = true; - std::string VersionName = Dumper->getSymbolVersionByIndex( - StrTable, Versym->vs_index, IsDefault); + std::string VersionName = + Dumper->getSymbolVersionByIndex(Versym->vs_index, IsDefault); if (!VersionName.empty()) VersionName = "(" + VersionName + ")"; else VersionName = "(*invalid*)"; + + OS << format("%4x%c", Versym->vs_index & VERSYM_VERSION, + Versym->vs_index & VERSYM_HIDDEN ? 'h' : ' '); OS << left_justify(VersionName, 13); } VersymBuf += sizeof(Elf_Versym); From 76b70f6f75e90074bf2f5168383cc3afc21b61ad Mon Sep 17 00:00:00 2001 From: "Wang, Pengfei" Date: Mon, 2 Dec 2019 19:39:16 +0800 Subject: [PATCH 275/591] [X86] Add initialization of FPCW in llvm-exegesis Summary: This is a following up to D70874. It adds the initialization of FPCW in llvm-exegesis. Reviewers: craig.topper, RKSimon, courbet, gchatelet Subscribers: tschuett, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70891 --- .../test/tools/llvm-exegesis/X86/uops-ADD_F32m.s | 9 +++++++++ llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 16 +++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 llvm/test/tools/llvm-exegesis/X86/uops-ADD_F32m.s diff --git a/llvm/test/tools/llvm-exegesis/X86/uops-ADD_F32m.s b/llvm/test/tools/llvm-exegesis/X86/uops-ADD_F32m.s new file mode 100644 index 0000000000000..023fa78282ba6 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/uops-ADD_F32m.s @@ -0,0 +1,9 @@ +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD_F32m -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mode=uops -opcode-name=ADD_F32m -repetition-mode=loop | FileCheck %s + +CHECK: mode: uops +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: ADD_F32m +CHECK: register_initial_values: +CHECK: FPCW diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 974b2c938c3ba..08724dd41526e 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -439,7 +439,8 @@ struct ConstantInliner { std::vector popFlagAndFinalize(); - std::vector loadMXCSRAndFinalize(bool HasAVX); + std::vector loadImplicitRegAndFinalize(unsigned Opcode, + unsigned Value); private: ConstantInliner &add(const MCInst &Inst) { @@ -501,10 +502,11 @@ std::vector ConstantInliner::popFlagAndFinalize() { return std::move(Instructions); } -std::vector ConstantInliner::loadMXCSRAndFinalize(bool HasAVX) { +std::vector +ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) { add(allocateStackSpace(4)); - add(fillStackSpace(X86::MOV32mi, 0, 0x1f80)); // Mask all FP exceptions - add(MCInstBuilder(HasAVX ? X86::VLDMXCSR : X86::LDMXCSR) + add(fillStackSpace(X86::MOV32mi, 0, Value)); // Mask all FP exceptions + add(MCInstBuilder(Opcode) // Address = ESP .addReg(X86::RSP) // BaseReg .addImm(1) // ScaleAmt @@ -715,7 +717,11 @@ std::vector ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI, if (Reg == X86::EFLAGS) return CI.popFlagAndFinalize(); if (Reg == X86::MXCSR) - return CI.loadMXCSRAndFinalize(STI.getFeatureBits()[X86::FeatureAVX]); + return CI.loadImplicitRegAndFinalize( + STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR + : X86::LDMXCSR, 0x1f80); + if (Reg == X86::FPCW) + return CI.loadImplicitRegAndFinalize(X86::FLDCW16m, 0x37f); return {}; // Not yet implemented. } From 4f728bfc13c45bc744bfdbfc3086bed74a8cbb4c Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Sat, 30 Nov 2019 15:30:08 +0100 Subject: [PATCH 276/591] [lldb][NFC] Use raw_ostream instead of Stream in Baton::GetDescription Removing raw_ostream here is getting us closer to removing LLDB's Stream class. --- .../lldb/Breakpoint/BreakpointOptions.h | 3 +- .../lldb/Breakpoint/WatchpointOptions.h | 3 +- lldb/include/lldb/Utility/Baton.h | 13 +++++-- lldb/source/Breakpoint/BreakpointOptions.cpp | 37 +++++++++---------- lldb/source/Breakpoint/WatchpointOptions.cpp | 28 +++++++------- .../CommandObjectBreakpointCommand.cpp | 8 ++-- .../CommandObjectWatchpointCommand.cpp | 8 ++-- lldb/source/Utility/Baton.cpp | 5 ++- 8 files changed, 56 insertions(+), 49 deletions(-) diff --git a/lldb/include/lldb/Breakpoint/BreakpointOptions.h b/lldb/include/lldb/Breakpoint/BreakpointOptions.h index 9e02afff5227e..2c52170eb9f6a 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointOptions.h +++ b/lldb/include/lldb/Breakpoint/BreakpointOptions.h @@ -88,7 +88,8 @@ friend class Breakpoint; explicit CommandBaton(std::unique_ptr Data) : TypedBaton(std::move(Data)) {} - void GetDescription(Stream *s, lldb::DescriptionLevel level) const override; + void GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const override; }; typedef std::shared_ptr CommandBatonSP; diff --git a/lldb/include/lldb/Breakpoint/WatchpointOptions.h b/lldb/include/lldb/Breakpoint/WatchpointOptions.h index b395dde21901e..0dc34d4ebef73 100644 --- a/lldb/include/lldb/Breakpoint/WatchpointOptions.h +++ b/lldb/include/lldb/Breakpoint/WatchpointOptions.h @@ -180,7 +180,8 @@ class WatchpointOptions { CommandBaton(std::unique_ptr Data) : TypedBaton(std::move(Data)) {} - void GetDescription(Stream *s, lldb::DescriptionLevel level) const override; + void GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const override; }; protected: diff --git a/lldb/include/lldb/Utility/Baton.h b/lldb/include/lldb/Utility/Baton.h index 4050f2af2bf04..c42867489c65d 100644 --- a/lldb/include/lldb/Utility/Baton.h +++ b/lldb/include/lldb/Utility/Baton.h @@ -12,6 +12,8 @@ #include "lldb/lldb-enumerations.h" #include "lldb/lldb-public.h" +#include "llvm/Support/raw_ostream.h" + #include namespace lldb_private { @@ -37,8 +39,9 @@ class Baton { virtual void *data() = 0; - virtual void GetDescription(Stream *s, - lldb::DescriptionLevel level) const = 0; + virtual void GetDescription(llvm::raw_ostream &s, + lldb::DescriptionLevel level, + unsigned indentation) const = 0; }; class UntypedBaton : public Baton { @@ -50,7 +53,8 @@ class UntypedBaton : public Baton { } void *data() override { return m_data; } - void GetDescription(Stream *s, lldb::DescriptionLevel level) const override; + void GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const override; void *m_data; // Leave baton public for easy access }; @@ -63,7 +67,8 @@ template class TypedBaton : public Baton { const T *getItem() const { return Item.get(); } void *data() override { return Item.get(); } - void GetDescription(Stream *s, lldb::DescriptionLevel level) const override {} + void GetDescription(llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const override {} protected: std::unique_ptr Item; diff --git a/lldb/source/Breakpoint/BreakpointOptions.cpp b/lldb/source/Breakpoint/BreakpointOptions.cpp index 0d4c6173c3c54..8fd16f420c04f 100644 --- a/lldb/source/Breakpoint/BreakpointOptions.cpp +++ b/lldb/source/Breakpoint/BreakpointOptions.cpp @@ -566,7 +566,8 @@ void BreakpointOptions::GetDescription(Stream *s, if (m_callback_baton_sp.get()) { if (level != eDescriptionLevelBrief) { s->EOL(); - m_callback_baton_sp->GetDescription(s, level); + m_callback_baton_sp->GetDescription(s->AsRawOstream(), level, + s->GetIndentLevel()); } } if (!m_condition_text.empty()) { @@ -578,35 +579,33 @@ void BreakpointOptions::GetDescription(Stream *s, } void BreakpointOptions::CommandBaton::GetDescription( - Stream *s, lldb::DescriptionLevel level) const { + llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const { const CommandData *data = getItem(); if (level == eDescriptionLevelBrief) { - s->Printf(", commands = %s", - (data && data->user_source.GetSize() > 0) ? "yes" : "no"); + s << ", commands = " + << ((data && data->user_source.GetSize() > 0) ? "yes" : "no"); return; } - s->IndentMore(); - s->Indent("Breakpoint commands"); + indentation += 2; + s.indent(indentation); + s << "Breakpoint commands"; if (data->interpreter != eScriptLanguageNone) - s->Printf(" (%s):\n", - ScriptInterpreter::LanguageToString(data->interpreter).c_str()); + s << llvm::formatv(" ({0}):\n", + ScriptInterpreter::LanguageToString(data->interpreter)); else - s->PutCString(":\n"); + s << ":\n"; - s->IndentMore(); + indentation += 2; if (data && data->user_source.GetSize() > 0) { - const size_t num_strings = data->user_source.GetSize(); - for (size_t i = 0; i < num_strings; ++i) { - s->Indent(data->user_source.GetStringAtIndex(i)); - s->EOL(); + for (llvm::StringRef str : data->user_source) { + s.indent(indentation); + s << str << "\n"; } - } else { - s->PutCString("No commands.\n"); - } - s->IndentLess(); - s->IndentLess(); + } else + s << "No commands.\n"; } void BreakpointOptions::SetCommandDataCallback( diff --git a/lldb/source/Breakpoint/WatchpointOptions.cpp b/lldb/source/Breakpoint/WatchpointOptions.cpp index cd5ef930e5dcf..026bf2f746aef 100644 --- a/lldb/source/Breakpoint/WatchpointOptions.cpp +++ b/lldb/source/Breakpoint/WatchpointOptions.cpp @@ -121,7 +121,8 @@ void WatchpointOptions::GetCallbackDescription( Stream *s, lldb::DescriptionLevel level) const { if (m_callback_baton_sp.get()) { s->EOL(); - m_callback_baton_sp->GetDescription(s, level); + m_callback_baton_sp->GetDescription(s->AsRawOstream(), level, + s->GetIndentLevel()); } } @@ -156,27 +157,26 @@ void WatchpointOptions::GetDescription(Stream *s, } void WatchpointOptions::CommandBaton::GetDescription( - Stream *s, lldb::DescriptionLevel level) const { + llvm::raw_ostream &s, lldb::DescriptionLevel level, + unsigned indentation) const { const CommandData *data = getItem(); if (level == eDescriptionLevelBrief) { - s->Printf(", commands = %s", - (data && data->user_source.GetSize() > 0) ? "yes" : "no"); + s << ", commands = %s" + << ((data && data->user_source.GetSize() > 0) ? "yes" : "no"); return; } - s->IndentMore(); - s->Indent("watchpoint commands:\n"); + indentation += 2; + s.indent(indentation); + s << "watchpoint commands:\n"; - s->IndentMore(); + indentation += 2; if (data && data->user_source.GetSize() > 0) { for (const std::string &line : data->user_source) { - s->Indent(line); - s->EOL(); + s.indent(indentation); + s << line << "\n"; } - } else { - s->PutCString("No commands.\n"); - } - s->IndentLess(); - s->IndentLess(); + } else + s << "No commands.\n"; } diff --git a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp index 1a4432149f731..a82e70a1cdaba 100644 --- a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp @@ -674,10 +674,10 @@ class CommandObjectBreakpointCommandList : public CommandObjectParsed { if (baton) { result.GetOutputStream().Printf("Breakpoint %s:\n", id_str.GetData()); - result.GetOutputStream().IndentMore(); - baton->GetDescription(&result.GetOutputStream(), - eDescriptionLevelFull); - result.GetOutputStream().IndentLess(); + baton->GetDescription(result.GetOutputStream().AsRawOstream(), + eDescriptionLevelFull, + result.GetOutputStream().GetIndentLevel() + + 2); } else { result.AppendMessageWithFormat( "Breakpoint %s does not have an associated command.\n", diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index 5683381efc858..92a91cfac2208 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -611,10 +611,10 @@ class CommandObjectWatchpointCommandList : public CommandObjectParsed { const Baton *baton = wp_options->GetBaton(); if (baton) { result.GetOutputStream().Printf("Watchpoint %u:\n", cur_wp_id); - result.GetOutputStream().IndentMore(); - baton->GetDescription(&result.GetOutputStream(), - eDescriptionLevelFull); - result.GetOutputStream().IndentLess(); + baton->GetDescription(result.GetOutputStream().AsRawOstream(), + eDescriptionLevelFull, + result.GetOutputStream().GetIndentLevel() + + 2); } else { result.AppendMessageWithFormat( "Watchpoint %u does not have an associated command.\n", diff --git a/lldb/source/Utility/Baton.cpp b/lldb/source/Utility/Baton.cpp index 84e295e246864..7bba10dcec962 100644 --- a/lldb/source/Utility/Baton.cpp +++ b/lldb/source/Utility/Baton.cpp @@ -8,5 +8,6 @@ #include "lldb/Utility/Baton.h" -void lldb_private::UntypedBaton::GetDescription( - Stream *s, lldb::DescriptionLevel level) const {} +void lldb_private::UntypedBaton::GetDescription(llvm::raw_ostream &s, + lldb::DescriptionLevel level, + unsigned indentation) const {} From d62026e2dde1d27c7d1c702f11b0464e1d470d4f Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Mon, 2 Dec 2019 14:34:51 +0100 Subject: [PATCH 277/591] [lldb][NFC] Don't calculate member indices in DWARFASTParserClang::ParseChildMembers We keep counting members and then don't do anything with the computed result. --- lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 43030c62cb407..ca1db03b02fa2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -2465,7 +2465,6 @@ bool DWARFASTParserClang::ParseChildMembers( const uint64_t parent_bit_size = parent_byte_size == UINT64_MAX ? UINT64_MAX : parent_byte_size * 8; - uint32_t member_idx = 0; BitfieldInfo last_field_info; ModuleSP module_sp = parent_die.GetDWARF()->GetObjectFile()->GetModule(); @@ -2935,7 +2934,6 @@ bool DWARFASTParserClang::ParseChildMembers( } } } - ++member_idx; } break; case DW_TAG_subprogram: From 62364965619bd7e8847418b21ec327a78bd1624c Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 2 Dec 2019 14:20:15 +0000 Subject: [PATCH 278/591] [OpenCL] Fix address space for implicit conversion (PR43145) Clang was creating a DerivedToBase ImplicitCastExpr that was also casting between address spaces as part of the second step in the standard conversion sequence. Defer the address space conversion to the third step in the sequence instead, such that we get a separate ImplicitCastExpr for the address space conversion. Differential Revision: https://reviews.llvm.org/D70605 --- clang/lib/Sema/SemaExprCXX.cpp | 23 ++++++++++++++++--- .../addrspace-derived-base.cl | 11 +++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 67492a2cd4633..9e5e49fa0f931 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4095,9 +4095,26 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, << From->getSourceRange(); } + // Defer address space conversion to the third conversion. + QualType FromPteeType = From->getType()->getPointeeType(); + QualType ToPteeType = ToType->getPointeeType(); + QualType NewToType = ToType; + if (!FromPteeType.isNull() && !ToPteeType.isNull() && + FromPteeType.getAddressSpace() != ToPteeType.getAddressSpace()) { + NewToType = Context.removeAddrSpaceQualType(ToPteeType); + NewToType = Context.getAddrSpaceQualType(NewToType, + FromPteeType.getAddressSpace()); + if (ToType->isObjCObjectPointerType()) + NewToType = Context.getObjCObjectPointerType(NewToType); + else if (ToType->isBlockPointerType()) + NewToType = Context.getBlockPointerType(NewToType); + else + NewToType = Context.getPointerType(NewToType); + } + CastKind Kind; CXXCastPath BasePath; - if (CheckPointerConversion(From, ToType, Kind, BasePath, CStyle)) + if (CheckPointerConversion(From, NewToType, Kind, BasePath, CStyle)) return ExprError(); // Make sure we extend blocks if necessary. @@ -4108,8 +4125,8 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, From = E.get(); } if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers()) - CheckObjCConversion(SourceRange(), ToType, From, CCK); - From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK) + CheckObjCConversion(SourceRange(), NewToType, From, CCK); + From = ImpCastExprToType(From, NewToType, Kind, VK_RValue, &BasePath, CCK) .get(); break; } diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-derived-base.cl b/clang/test/CodeGenOpenCLCXX/addrspace-derived-base.cl index d5d369fa80bb6..623d201c21800 100644 --- a/clang/test/CodeGenOpenCLCXX/addrspace-derived-base.cl +++ b/clang/test/CodeGenOpenCLCXX/addrspace-derived-base.cl @@ -69,3 +69,14 @@ void pr43145_3(int n) { // CHECK: bitcast i8 addrspace(4)* %add.ptr1 to %class.B2 addrspace(4)* // CHECK: call {{.*}} @_ZNU3AS42B26getRefEv } + +// Implicit conversion of derived to base. + +void functionWithBaseArgPtr(class B2 *b) {} +void functionWithBaseArgRef(class B2 &b) {} + +void pr43145_4() { + Derived d; + functionWithBaseArgPtr(&d); + functionWithBaseArgRef(d); +} From af0babc90a5c35cbecce4cc4a27e0396fe03f588 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 2 Dec 2019 09:21:59 -0500 Subject: [PATCH 279/591] [InstCombine] fold copysign with constant sign argument to (fneg+)fabs If the sign of the sign argument is known (this could be extended to use ValueTracking), then we can use fneg+fabs to clear/set the sign bit of the magnitude argument. http://llvm.org/docs/LangRef.html#llvm-copysign-intrinsic This transform is already done in DAGCombiner, but we can do it sooner in IR as suggested in PR44153: https://bugs.llvm.org/show_bug.cgi?id=44153 We have effectively no analysis for copysign in IR, so we are taking the unusual step of increasing the number of IR instructions for the negative constant case. Differential Revision: https://reviews.llvm.org/D70792 --- .../InstCombine/InstCombineCalls.cpp | 15 +++++++++++++++ llvm/test/Transforms/InstCombine/copysign.ll | 18 ++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 23ca03ff68b0d..536e84b4a35ee 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2279,6 +2279,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::copysign: { + const APFloat *C; + if (match(II->getArgOperand(1), m_APFloat(C))) { + // If we know the sign bit of the sign argument, reduce to FABS/FNABS: + // copysign X, PosC --> fabs X + // copysign X, NegC --> fneg (fabs X) + Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, + II->getArgOperand(0), II); + if (C->isNegative()) + Fabs = Builder.CreateFNegFMF(Fabs, II); + + return replaceInstUsesWith(*II, Fabs); + } + break; + } case Intrinsic::fabs: { Value *Cond; Constant *LHS, *RHS; diff --git a/llvm/test/Transforms/InstCombine/copysign.ll b/llvm/test/Transforms/InstCombine/copysign.ll index 5d30f346807df..49ca1b87d0de9 100644 --- a/llvm/test/Transforms/InstCombine/copysign.ll +++ b/llvm/test/Transforms/InstCombine/copysign.ll @@ -6,8 +6,8 @@ declare <3 x double> @llvm.copysign.v3f64(<3 x double>, <3 x double>) define float @positive_sign_arg(float %x) { ; CHECK-LABEL: @positive_sign_arg( -; CHECK-NEXT: [[R:%.*]] = call arcp float @llvm.copysign.f32(float [[X:%.*]], float 0.000000e+00) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call arcp float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: ret float [[TMP1]] ; %r = call arcp float @llvm.copysign.f32(float %x, float 0.0) ret float %r @@ -15,8 +15,8 @@ define float @positive_sign_arg(float %x) { define <3 x double> @positive_sign_arg_vec_splat(<3 x double> %x) { ; CHECK-LABEL: @positive_sign_arg_vec_splat( -; CHECK-NEXT: [[R:%.*]] = call ninf <3 x double> @llvm.copysign.v3f64(<3 x double> [[X:%.*]], <3 x double> ) -; CHECK-NEXT: ret <3 x double> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call ninf <3 x double> @llvm.fabs.v3f64(<3 x double> [[X:%.*]]) +; CHECK-NEXT: ret <3 x double> [[TMP1]] ; %r = call ninf <3 x double> @llvm.copysign.v3f64(<3 x double> %x, <3 x double> ) ret <3 x double> %r @@ -24,8 +24,9 @@ define <3 x double> @positive_sign_arg_vec_splat(<3 x double> %x) { define float @negative_sign_arg(float %x) { ; CHECK-LABEL: @negative_sign_arg( -; CHECK-NEXT: [[R:%.*]] = call nnan float @llvm.copysign.f32(float [[X:%.*]], float -0.000000e+00) -; CHECK-NEXT: ret float [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan float [[TMP1]] +; CHECK-NEXT: ret float [[TMP2]] ; %r = call nnan float @llvm.copysign.f32(float %x, float -0.0) ret float %r @@ -33,8 +34,9 @@ define float @negative_sign_arg(float %x) { define <3 x double> @negative_sign_arg_vec_splat(<3 x double> %x) { ; CHECK-LABEL: @negative_sign_arg_vec_splat( -; CHECK-NEXT: [[R:%.*]] = call fast <3 x double> @llvm.copysign.v3f64(<3 x double> [[X:%.*]], <3 x double> ) -; CHECK-NEXT: ret <3 x double> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast <3 x double> @llvm.fabs.v3f64(<3 x double> [[X:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = fneg fast <3 x double> [[TMP1]] +; CHECK-NEXT: ret <3 x double> [[TMP2]] ; %r = call fast <3 x double> @llvm.copysign.v3f64(<3 x double> %x, <3 x double> ) ret <3 x double> %r From dcf11c5e86cee94ec649a7a31c5dd259f60579d6 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Mon, 2 Dec 2019 12:13:04 +0000 Subject: [PATCH 280/591] [ARM][AArch64] Complex addition Neon intrinsics for Armv8.3-A Summary: Add support for vcadd_* family of intrinsics. This set of intrinsics is available in Armv8.3-A. The fp16 versions require the FP16 extension, which has been available (opt-in) since Armv8.2-A. Reviewers: t.p.northover Reviewed By: t.p.northover Subscribers: t.p.northover, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70862 --- clang/include/clang/Basic/arm_neon.td | 18 ++++++ clang/lib/Basic/Targets/AArch64.cpp | 1 + clang/lib/Basic/Targets/ARM.cpp | 12 ++++ clang/lib/Basic/Targets/ARM.h | 3 +- clang/lib/CodeGen/CGBuiltin.cpp | 8 +++ clang/test/CodeGen/aarch64-neon-vcadd.c | 63 +++++++++++++++++++ clang/test/CodeGen/arm-neon-vcadd.c | 51 ++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 ++ llvm/include/llvm/IR/IntrinsicsARM.td | 4 ++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 23 +++++++ llvm/lib/Target/ARM/ARMInstrNEON.td | 21 +++++++ llvm/test/CodeGen/AArch64/neon-vcadd.ll | 67 +++++++++++++++++++++ llvm/test/CodeGen/ARM/neon-vcadd.ll | 54 +++++++++++++++++ 13 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/aarch64-neon-vcadd.c create mode 100644 clang/test/CodeGen/arm-neon-vcadd.c create mode 100644 llvm/test/CodeGen/AArch64/neon-vcadd.ll create mode 100644 llvm/test/CodeGen/ARM/neon-vcadd.ll diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index b5e395c8103f1..a4dc21b643110 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1673,3 +1673,21 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in { def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>; def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>; } + +// v8.3-A Vector complex addition intrinsics +let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { + def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">; + def VCADD_ROT270_FP16 : SInst<"vcadd_rot270", "...", "h">; + def VCADDQ_ROT90_FP16 : SInst<"vcaddq_rot90", "QQQ", "h">; + def VCADDQ_ROT270_FP16 : SInst<"vcaddq_rot270", "QQQ", "h">; +} +let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in { + def VCADD_ROT90 : SInst<"vcadd_rot90", "...", "f">; + def VCADD_ROT270 : SInst<"vcadd_rot270", "...", "f">; + def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">; + def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">; +} +let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in { + def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">; + def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">; +} \ No newline at end of file diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 5214f7c30ee0a..cba3e3ada7ea5 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -158,6 +158,7 @@ void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts, void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, MacroBuilder &Builder) const { + Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1"); Builder.defineMacro("__ARM_FEATURE_JCVT", "1"); // Also include the Armv8.2 defines getTargetDefinesARMV82A(Opts, Builder); diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 437a77afdc998..be088e81cffe4 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -580,6 +580,13 @@ void ARMTargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts, getTargetDefinesARMV81A(Opts, Builder); } +void ARMTargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Also include the ARMv8.2-A defines + Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1"); + getTargetDefinesARMV82A(Opts, Builder); +} + void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Target identification. @@ -809,6 +816,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, case llvm::ARM::ArchKind::ARMV8_2A: getTargetDefinesARMV82A(Opts, Builder); break; + case llvm::ARM::ArchKind::ARMV8_3A: + case llvm::ARM::ArchKind::ARMV8_4A: + case llvm::ARM::ArchKind::ARMV8_5A: + getTargetDefinesARMV83A(Opts, Builder); + break; } } diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index ce87a6265934b..9696a44045891 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -148,9 +148,10 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { void getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const; - void getTargetDefinesARMV82A(const LangOptions &Opts, MacroBuilder &Builder) const; + void getTargetDefinesARMV83A(const LangOptions &Opts, + MacroBuilder &Builder) const; void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 26044f53e4965..68706d78cd111 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4454,6 +4454,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), + NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, arm_neon_vacge, 0), NEONMAP1(vcageq_v, arm_neon_vacge, 0), NEONMAP1(vcagt_v, arm_neon_vacgt, 0), @@ -4727,6 +4731,10 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), + NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), + NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType), + NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, aarch64_neon_facge, 0), NEONMAP1(vcageq_v, aarch64_neon_facge, 0), NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), diff --git a/clang/test/CodeGen/aarch64-neon-vcadd.c b/clang/test/CodeGen/aarch64-neon-vcadd.c new file mode 100644 index 0000000000000..6f1b3dcd40158 --- /dev/null +++ b/clang/test/CodeGen/aarch64-neon-vcadd.c @@ -0,0 +1,63 @@ +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.3-a+fp16 %s -S -emit-llvm -o - | FileCheck %s + +#include + +void foo16x4_rot90(float16x4_t a, float16x4_t b) +{ +// CHECK: call <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16 + float16x4_t result = vcadd_rot90_f16(a, b); +} + +void foo32x2_rot90(float32x2_t a, float32x2_t b) +{ +// CHECK: call <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32 + float32x2_t result = vcadd_rot90_f32(a, b); +} + +void foo16x8_rot90(float16x8_t a, float16x8_t b) +{ +// CHECK: call <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16 + float16x8_t result = vcaddq_rot90_f16(a, b); +} + +void foo32x4_rot90(float32x4_t a, float32x4_t b) +{ +// CHECK: call <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32 + float32x4_t result = vcaddq_rot90_f32(a, b); +} + +void foo64x2_rot90(float64x2_t a, float64x2_t b) +{ +// CHECK: call <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64 + float64x2_t result = vcaddq_rot90_f64(a, b); +} + +void foo16x4_rot270(float16x4_t a, float16x4_t b) +{ +// CHECK: call <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16 + float16x4_t result = vcadd_rot270_f16(a, b); +} + +void foo32x2_rot270(float32x2_t a, float32x2_t b) +{ +// CHECK: call <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32 + float32x2_t result = vcadd_rot270_f32(a, b); +} + +void foo16x8_rot270(float16x8_t a, float16x8_t b) +{ +// CHECK: call <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16 + float16x8_t result = vcaddq_rot270_f16(a, b); +} + +void foo32x4_rot270(float32x4_t a, float32x4_t b) +{ +// CHECK: call <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32 + float32x4_t result = vcaddq_rot270_f32(a, b); +} + +void foo64x2_rot270(float64x2_t a, float64x2_t b) +{ +// CHECK: call <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64 + float64x2_t result = vcaddq_rot270_f64(a, b); +} diff --git a/clang/test/CodeGen/arm-neon-vcadd.c b/clang/test/CodeGen/arm-neon-vcadd.c new file mode 100644 index 0000000000000..4b9cf34c486ca --- /dev/null +++ b/clang/test/CodeGen/arm-neon-vcadd.c @@ -0,0 +1,51 @@ +// RUN: %clang -target arm-arm-none-eabi -march=armv8.3-a+fp16 %s -S -emit-llvm -o - | opt -S -sroa | FileCheck %s + +#include + +void foo16x4_rot90(float16x4_t a, float16x4_t b) +{ +// CHECK: call <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16 + float16x4_t result = vcadd_rot90_f16(a, b); +} + +void foo32x2_rot90(float32x2_t a, float32x2_t b) +{ +// CHECK: call <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32 + float32x2_t result = vcadd_rot90_f32(a, b); +} + +void foo16x8_rot90(float16x8_t a, float16x8_t b) +{ +// CHECK: call <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16 + float16x8_t result = vcaddq_rot90_f16(a, b); +} + +void foo32x4_rot90(float32x4_t a, float32x4_t b) +{ +// CHECK: call <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32 + float32x4_t result = vcaddq_rot90_f32(a, b); +} + +void foo16x4_rot270(float16x4_t a, float16x4_t b) +{ +// CHECK: call <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16 + float16x4_t result = vcadd_rot270_f16(a, b); +} + +void foo32x2_rot270(float32x2_t a, float32x2_t b) +{ +// CHECK: call <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32 + float32x2_t result = vcadd_rot270_f32(a, b); +} + +void foo16x8_rot270(float16x8_t a, float16x8_t b) +{ +// CHECK: call <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16 + float16x8_t result = vcaddq_rot270_f16(a, b); +} + +void foo32x4_rot270(float32x4_t a, float32x4_t b) +{ +// CHECK: call <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32 + float32x4_t result = vcaddq_rot270_f32(a, b); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index c74b17c988959..d22993300eb2f 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -446,6 +446,10 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { def int_aarch64_neon_fmlsl : AdvSIMD_FP16FML_Intrinsic; def int_aarch64_neon_fmlal2 : AdvSIMD_FP16FML_Intrinsic; def int_aarch64_neon_fmlsl2 : AdvSIMD_FP16FML_Intrinsic; + + // v8.3-A Floating-point complex add + def int_aarch64_neon_vcadd_rot90 : AdvSIMD_2VectorArg_Intrinsic; + def int_aarch64_neon_vcadd_rot270 : AdvSIMD_2VectorArg_Intrinsic; } let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 4d4bc5e2d82d0..337110b4917b4 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -778,6 +778,10 @@ def int_arm_vctp16 : Intrinsic<[llvm_v8i1_ty], [llvm_i32_ty], [IntrNoMem]>; def int_arm_vctp32 : Intrinsic<[llvm_v4i1_ty], [llvm_i32_ty], [IntrNoMem]>; def int_arm_vctp64 : Intrinsic<[llvm_v2i1_ty], [llvm_i32_ty], [IntrNoMem]>; +// v8.3-A Floating-point complex add +def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic; +def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic; + // GNU eabi mcount def int_arm_gnu_eabi_mcount : Intrinsic<[], [], diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ec84c1efbaf14..d619362e029e5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -757,6 +757,29 @@ defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla", null_frag>; +let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { + def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), + (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; + def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), + (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; + def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), + (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; + def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), + (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; +} +let Predicates = [HasComplxNum, HasNEON] in { + def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; + def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), + (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; + foreach Ty = [v4f32, v2f64] in { + def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), + (!cast("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; + def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), + (!cast("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; + } +} + // v8.3a Pointer Authentication // These instructions inhabit part of the hint space and so can be used for // armv8 targets diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 1653ce1275cf1..0e2b3a08b3ecf 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5012,6 +5012,27 @@ defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; +let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), + (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>; + def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))), + (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>; + def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), + (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>; + def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))), + (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>; +} +let Predicates = [HasNEON,HasV8_3a] in { + def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), + (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>; + def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))), + (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>; + def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), + (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>; + def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))), + (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>; +} + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) diff --git a/llvm/test/CodeGen/AArch64/neon-vcadd.ll b/llvm/test/CodeGen/AArch64/neon-vcadd.ll new file mode 100644 index 0000000000000..11605267c09b4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-vcadd.ll @@ -0,0 +1,67 @@ +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s + +define <4 x half> @foo16x4_rot(<4 x half> %a, <4 x half> %b) { +entry: +; CHECK-LABEL: foo16x4_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #270 + %vcadd_rot90_v2.i = tail call <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16(<4 x half> %a, <4 x half> %b) + %vcadd_rot270_v2.i = tail call <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16(<4 x half> %a, <4 x half> %b) + %add = fadd <4 x half> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i + ret <4 x half> %add +} + +define <2 x float> @foo32x2_rot(<2 x float> %a, <2 x float> %b) { +entry: +; CHECK-LABEL: foo32x2_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #270 + %vcadd_rot90_v2.i = tail call <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32(<2 x float> %a, <2 x float> %b) + %vcadd_rot270_v2.i = tail call <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32(<2 x float> %a, <2 x float> %b) + %add = fadd <2 x float> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i + ret <2 x float> %add +} + +define <8 x half> @foo16x8_rot(<8 x half> %a, <8 x half> %b) { +entry: +; CHECK-LABEL: foo16x8_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #270 + %vcaddq_rot90_v2.i = tail call <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16(<8 x half> %a, <8 x half> %b) + %vcaddq_rot270_v2.i = tail call <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16(<8 x half> %a, <8 x half> %b) + %add = fadd <8 x half> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <8 x half> %add +} + +define <4 x float> @foo32x4_rot(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: foo32x4_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #270 + %vcaddq_rot90_v2.i = tail call <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32(<4 x float> %a, <4 x float> %b) + %vcaddq_rot270_v2.i = tail call <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32(<4 x float> %a, <4 x float> %b) + %add = fadd <4 x float> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <4 x float> %add +} + +define <2 x double> @foo64x2_rot(<2 x double> %a, <2 x double> %b) { +entry: +; CHECK-LABEL: foo64x2_rot +; CHECK-DAG: fcadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #90 +; CHECK-DAG: fcadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #270 + %vcaddq_rot90_v2.i = tail call <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64(<2 x double> %a, <2 x double> %b) + %vcaddq_rot270_v2.i = tail call <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64(<2 x double> %a, <2 x double> %b) + %add = fadd <2 x double> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <2 x double> %add +} + +declare <4 x half> @llvm.aarch64.neon.vcadd.rot90.v4f16(<4 x half>, <4 x half>) +declare <4 x half> @llvm.aarch64.neon.vcadd.rot270.v4f16(<4 x half>, <4 x half>) +declare <2 x float> @llvm.aarch64.neon.vcadd.rot90.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.aarch64.neon.vcadd.rot270.v2f32(<2 x float>, <2 x float>) +declare <8 x half> @llvm.aarch64.neon.vcadd.rot90.v8f16(<8 x half>, <8 x half>) +declare <8 x half> @llvm.aarch64.neon.vcadd.rot270.v8f16(<8 x half>, <8 x half>) +declare <4 x float> @llvm.aarch64.neon.vcadd.rot90.v4f32(<4 x float>, <4 x float>) +declare <4 x float> @llvm.aarch64.neon.vcadd.rot270.v4f32(<4 x float>, <4 x float>) +declare <2 x double> @llvm.aarch64.neon.vcadd.rot90.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.aarch64.neon.vcadd.rot270.v2f64(<2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/ARM/neon-vcadd.ll b/llvm/test/CodeGen/ARM/neon-vcadd.ll new file mode 100644 index 0000000000000..93a85c8c73c6c --- /dev/null +++ b/llvm/test/CodeGen/ARM/neon-vcadd.ll @@ -0,0 +1,54 @@ +; RUN: llc %s -mtriple=arm -mattr=+armv8.3-a,+fullfp16 -o - | FileCheck %s + +define <4 x half> @foo16x4_rot(<4 x half> %a, <4 x half> %b) { +entry: +; CHECK-LABEL: foo16x4_rot +; CHECK-DAG: vcadd.f16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #90 +; CHECK-DAG: vcadd.f16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #270 + %vcadd_rot90_v2.i = tail call <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16(<4 x half> %a, <4 x half> %b) + %vcadd_rot270_v2.i = tail call <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16(<4 x half> %a, <4 x half> %b) + %add = fadd <4 x half> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i + ret <4 x half> %add +} + +define <2 x float> @foo32x2_rot(<2 x float> %a, <2 x float> %b) { +entry: +; CHECK-LABEL: foo32x2_rot +; CHECK-DAG: vcadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #90 +; CHECK-DAG: vcadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, #270 + %vcadd_rot90_v2.i = tail call <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32(<2 x float> %a, <2 x float> %b) + %vcadd_rot270_v2.i = tail call <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32(<2 x float> %a, <2 x float> %b) + %add = fadd <2 x float> %vcadd_rot90_v2.i, %vcadd_rot270_v2.i + ret <2 x float> %add +} + +define <8 x half> @foo16x8_rot(<8 x half> %a, <8 x half> %b) { +entry: +; CHECK-LABEL: foo16x8_rot +; CHECK-DAG: vcadd.f16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #90 +; CHECK-DAG: vcadd.f16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270 + %vcaddq_rot90_v2.i = tail call <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16(<8 x half> %a, <8 x half> %b) + %vcaddq_rot270_v2.i = tail call <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16(<8 x half> %a, <8 x half> %b) + %add = fadd <8 x half> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <8 x half> %add +} + +define <4 x float> @foo32x4_rot(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: foo32x4_rot +; CHECK-DAG: vcadd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #90 +; CHECK-DAG: vcadd.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270 + %vcaddq_rot90_v2.i = tail call <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32(<4 x float> %a, <4 x float> %b) + %vcaddq_rot270_v2.i = tail call <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32(<4 x float> %a, <4 x float> %b) + %add = fadd <4 x float> %vcaddq_rot90_v2.i, %vcaddq_rot270_v2.i + ret <4 x float> %add +} + +declare <4 x half> @llvm.arm.neon.vcadd.rot90.v4f16(<4 x half>, <4 x half>) +declare <4 x half> @llvm.arm.neon.vcadd.rot270.v4f16(<4 x half>, <4 x half>) +declare <2 x float> @llvm.arm.neon.vcadd.rot90.v2f32(<2 x float>, <2 x float>) +declare <2 x float> @llvm.arm.neon.vcadd.rot270.v2f32(<2 x float>, <2 x float>) +declare <8 x half> @llvm.arm.neon.vcadd.rot90.v8f16(<8 x half>, <8 x half>) +declare <8 x half> @llvm.arm.neon.vcadd.rot270.v8f16(<8 x half>, <8 x half>) +declare <4 x float> @llvm.arm.neon.vcadd.rot90.v4f32(<4 x float>, <4 x float>) +declare <4 x float> @llvm.arm.neon.vcadd.rot270.v4f32(<4 x float>, <4 x float>) From 241cbf201a6f4b7658697e3c76fc6e741d049a01 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 2 Dec 2019 08:32:59 -0600 Subject: [PATCH 281/591] [PowerPC] Fix crash in peephole optimization When converting reg+reg shifts to reg+imm rotates, we neglect to consider the CodeGenOnly versions of the 32-bit shift mnemonics. This means we produce a rotate with missing operands which causes a crash. Committing this fix without review since it is non-controversial that the list of mnemonics to consider should include the 64-bit aliases for the exact mnemonics. Fixes PR44183. --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 6 ++- llvm/test/CodeGen/PowerPC/pr44183.ll | 56 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr44183.ll diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index e94ef4b1e505c..f5e2b473f1ee5 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3757,8 +3757,10 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg(); unsigned Opc = MI.getOpcode(); - bool SpecialShift32 = - Opc == PPC::SLW || Opc == PPC::SLWo || Opc == PPC::SRW || Opc == PPC::SRWo; + bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLWo || + Opc == PPC::SRW || Opc == PPC::SRWo || + Opc == PPC::SLW8 || Opc == PPC::SLW8o || + Opc == PPC::SRW8 || Opc == PPC::SRW8o; bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLDo || Opc == PPC::SRD || Opc == PPC::SRDo; bool SetCR = Opc == PPC::SLWo || Opc == PPC::SRWo || diff --git a/llvm/test/CodeGen/PowerPC/pr44183.ll b/llvm/test/CodeGen/PowerPC/pr44183.ll new file mode 100644 index 0000000000000..1a6f932bc6d07 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr44183.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | FileCheck %s +%struct.m.2.5.8.11 = type { %struct.l.0.3.6.9, [7 x i8], %struct.a.1.4.7.10 } +%struct.l.0.3.6.9 = type { i8 } +%struct.a.1.4.7.10 = type { [27 x i8], [0 x i32], [4 x i8] } +define void @_ZN1m1nEv(%struct.m.2.5.8.11* %this) local_unnamed_addr nounwind align 2 { +; CHECK-LABEL: _ZN1m1nEv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: ld r4, 8(r30) +; CHECK-NEXT: lwz r5, 36(r30) +; CHECK-NEXT: rldicl r4, r4, 60, 4 +; CHECK-NEXT: rlwinm r3, r4, 31, 0, 0 +; CHECK-NEXT: rlwinm r4, r5, 0, 31, 31 +; CHECK-NEXT: or r4, r4, r3 +; CHECK-NEXT: bl _ZN1llsE1d +; CHECK-NEXT: nop +; CHECK-NEXT: ld r3, 16(r30) +; CHECK-NEXT: ld r4, 8(r30) +; CHECK-NEXT: rldicl r4, r4, 60, 4 +; CHECK-NEXT: sldi r3, r3, 60 +; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: sldi r3, r3, 31 +; CHECK-NEXT: clrldi r4, r3, 32 +; CHECK-NEXT: bl _ZN1llsE1d +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: blr +entry: + %bc = getelementptr inbounds %struct.m.2.5.8.11, %struct.m.2.5.8.11* %this, i64 0, i32 2 + %0 = bitcast %struct.a.1.4.7.10* %bc to i216* + %bf.load = load i216, i216* %0, align 8 + %bf.lshr = lshr i216 %bf.load, 4 + %shl.i23 = shl i216 %bf.lshr, 31 + %shl.i = trunc i216 %shl.i23 to i32 + %arrayidx = getelementptr inbounds %struct.m.2.5.8.11, %struct.m.2.5.8.11* %this, i64 0, i32 2, i32 1, i64 0 + %1 = load i32, i32* %arrayidx, align 4 + %and.i = and i32 %1, 1 + %or.i = or i32 %and.i, %shl.i + tail call void @_ZN1llsE1d(%struct.l.0.3.6.9* undef, i32 %or.i) #1 + %bf.load10 = load i216, i216* %0, align 8 + %bf.lshr11 = lshr i216 %bf.load10, 4 + %shl.i1524 = shl i216 %bf.lshr11, 31 + %shl.i15 = trunc i216 %shl.i1524 to i32 + tail call void @_ZN1llsE1d(%struct.l.0.3.6.9* undef, i32 %shl.i15) #1 + ret void +} +declare void @_ZN1llsE1d(%struct.l.0.3.6.9*, i32) local_unnamed_addr #0 From 0f22e783a038b6983f0fe161eef6cf2add3a4156 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 2 Dec 2019 17:34:55 +0300 Subject: [PATCH 282/591] [InstCombine] Revert rL341831: relax one-use check in foldICmpAddConstant() (PR44100) rL341831 moved one-use check higher up, restricting a few folds that produced a single instruction from two instructions to the case where the inner instruction would go away. Original commit message: > InstCombine: move hasOneUse check to the top of foldICmpAddConstant > > There were two combines not covered by the check before now, > neither of which actually differed from normal in the benefit analysis. > > The most recent seems to be because it was just added at the top of the > function (naturally). The older is from way back in 2008 (r46687) > when we just didn't put those checks in so routinely, and has been > diligently maintained since. From the commit message alone, there doesn't seem to be a deeper motivation, deeper problem that was trying to solve, other than 'fixing the wrong one-use check'. As i have briefly discusses in IRC with Tim, the original motivation can no longer be recovered, too much time has passed. However i believe that the original fold was doing the right thing, we should be performing such a transformation even if the inner `add` will not go away - that will still unchain the comparison from `add`, it will no longer need to wait for `add` to compute. Doing so doesn't seem to break any particular idioms, as least as far as i can see. References https://bugs.llvm.org/show_bug.cgi?id=44100 --- .../InstCombine/InstCombineCompares.cpp | 6 +-- llvm/test/Transforms/InstCombine/icmp-add.ll | 6 +-- .../LoopUnroll/runtime-loop-multiple-exits.ll | 6 +-- .../LoopVectorize/if-conversion-nest.ll | 54 +++++++++---------- .../Transforms/LoopVectorize/runtime-check.ll | 32 +++++------ 5 files changed, 52 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 5fb3ec8757133..071985eb64138 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2566,9 +2566,6 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, Type *Ty = Add->getType(); CmpInst::Predicate Pred = Cmp.getPredicate(); - if (!Add->hasOneUse()) - return nullptr; - // If the add does not wrap, we can always adjust the compare by subtracting // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE // are canonicalized to SGT/SLT/UGT/ULT. @@ -2602,6 +2599,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower)); } + if (!Add->hasOneUse()) + return nullptr; + // X+C (X & -C2) == C // iff C & (C2-1) == 0 // C2 is a power of 2 diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index 1e3875b112c3d..5cf0cfb8d33d8 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -77,14 +77,14 @@ define i1 @test4(i32 %a) { define { i32, i1 } @test4multiuse(i32 %a) { ; CHECK-LABEL: @test4multiuse( -; CHECK-NEXT: [[B:%.*]] = add i32 [[A:%.*]], -2147483644 -; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[B]], -4 +; CHECK-NEXT: [[B:%.*]] = add nsw i32 [[A:%.*]], -2147483644 +; CHECK-NEXT: [[C:%.*]] = icmp slt i32 [[A]], 2147483640 ; CHECK-NEXT: [[TMP:%.*]] = insertvalue { i32, i1 } undef, i32 [[B]], 0 ; CHECK-NEXT: [[RES:%.*]] = insertvalue { i32, i1 } [[TMP]], i1 [[C]], 1 ; CHECK-NEXT: ret { i32, i1 } [[RES]] ; - %b = add i32 %a, -2147483644 + %b = add nsw i32 %a, -2147483644 %c = icmp slt i32 %b, -4 %tmp = insertvalue { i32, i1 } undef, i32 %b, 0 diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll index 9f97358668b61..e91bd2eff9c0c 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -578,10 +578,10 @@ define void @test8() { ; PROLOG: %lcmp.mod = icmp eq i64 ; PROLOG-NEXT: br i1 %lcmp.mod, label %innerH.prol.loopexit, label %innerH.prol.preheader ; PROLOG: latch.6: -; PROLOG-NEXT: %tmp4.7 = add nuw nsw i64 %tmp3, 8 ; PROLOG-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch.7 -; PROLOG: latch.7 -; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp4.7, 100 +; PROLOG: latch.7: +; PROLOG-NEXT: %tmp4.7 = add nuw nsw i64 %tmp3, 8 +; PROLOG-NEXT: %tmp6.7 = icmp ult i64 %tmp3, 92 ; PROLOG-NEXT: br i1 %tmp6.7, label %innerH, label %exit.unr-lcssa bb: br label %outerloop diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index f254bc81a7c74..c1b29f80cade4 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -12,7 +12,7 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1 @@ -22,33 +22,33 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) { ; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]] -; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4, !alias.scope !0, !noalias !3 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !3 -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD6]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP13:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], -; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = and <4 x i1> [[TMP12]], [[TMP11]] -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i1> [[TMP11]], [[TMP16]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP17]], <4 x i32> [[TMP14]], <4 x i32> [[PREDPHI]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], <4 x i32>* [[TMP18]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4, !alias.scope !3 +; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD6]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP11]], +; CHECK-NEXT: [[TMP16:%.*]] = and <4 x i1> [[TMP10]], [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[PREDPHI7]], <4 x i32>* [[TMP17]], align 4, !alias.scope !0, !noalias !3 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -58,16 +58,16 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] ; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]] ; CHECK: if.then: -; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP20]], 19 +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], 19 ; CHECK-NEXT: br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP21]], 4 +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP20]], 4 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP10]], i32 4, i32 5 ; CHECK-NEXT: br label [[IF_END14]] ; CHECK: if.end14: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index 3f462425658c0..ecabef22f7255 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -19,7 +19,7 @@ define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtab ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1, !dbg !9 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg !9 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1, !dbg !9 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4, !dbg !9 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3, !dbg !9 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]], !dbg !9 ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1, !dbg !9 @@ -29,34 +29,34 @@ define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtab ; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[B:%.*]], i64 [[TMP5]], !dbg !9 ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[SCEVGEP4]], [[A]], !dbg !9 ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt float* [[SCEVGEP]], [[B]], !dbg !9 -; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]], !dbg !9 -; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], !dbg !9 +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]], !dbg !9 +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], !dbg !9 ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588, !dbg !9 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], !dbg !9 ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], !dbg !9 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]], !dbg !9 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP7]] to <4 x float>*, !dbg !9 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP8]], align 4, !dbg !9, !alias.scope !10 -; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[WIDE_LOAD]], , !dbg !9 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]], !dbg !9 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*, !dbg !9 -; CHECK-NEXT: store <4 x float> [[TMP9]], <4 x float>* [[TMP11]], align 4, !dbg !9, !alias.scope !13, !noalias !10 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]], !dbg !9 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*, !dbg !9 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4, !dbg !9, !alias.scope !10 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[WIDE_LOAD]], , !dbg !9 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]], !dbg !9 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <4 x float>*, !dbg !9 +; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP10]], align 4, !dbg !9, !alias.scope !13, !noalias !10 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4, !dbg !9 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg !9 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !dbg !9, !llvm.loop !15 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg !9 +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !dbg !9, !llvm.loop !15 ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]], !dbg !9 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]], !dbg !9 ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ], !dbg !9 ; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !9 ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], !dbg !9 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]], !dbg !9 -; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg !9 -; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP13]], 3.000000e+00, !dbg !9 +; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg !9 +; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP12]], 3.000000e+00, !dbg !9 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]], !dbg !9 ; CHECK-NEXT: store float [[MUL]], float* [[ARRAYIDX2]], align 4, !dbg !9 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1, !dbg !9 From 3540b80fe445ec467cba5e4cab2a4865bb945639 Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Mon, 2 Dec 2019 14:58:41 +0100 Subject: [PATCH 283/591] [llvm-exegesis] Fix 44b9942898c7. Summary: Add missing stack release instructions in loadImplicitRegAndFinalize. Reviewers: pengfei, gchatelet Subscribers: tschuett, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70903 --- llvm/test/tools/llvm-exegesis/X86/latency-SQRTSSr.s | 13 +++++++++++++ llvm/tools/llvm-exegesis/lib/X86/Target.cpp | 1 + 2 files changed, 14 insertions(+) create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency-SQRTSSr.s diff --git a/llvm/test/tools/llvm-exegesis/X86/latency-SQRTSSr.s b/llvm/test/tools/llvm-exegesis/X86/latency-SQRTSSr.s new file mode 100644 index 0000000000000..1908b9a9e0736 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/latency-SQRTSSr.s @@ -0,0 +1,13 @@ +# RUN: llvm-exegesis -mode=latency -opcode-name=SQRTSSr -repetition-mode=loop | FileCheck %s + +# Check that the setup code for MXCSR does not crash the snippet. + +CHECK: --- +CHECK-NEXT: mode: latency +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: SQRTSSr +CHECK-NEXT: config: '' +CHECK-NEXT: register_initial_values: +CHECK-NOT: crashed +CHECK-LAST: ... diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 08724dd41526e..61da38e5f5dd4 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -513,6 +513,7 @@ ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) { .addReg(0) // IndexReg .addImm(0) // Disp .addReg(0)); // Segment + add(releaseStackSpace(4)); return std::move(Instructions); } From 88bccded8fa169481fa367debf5ec615640635a1 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 29 Nov 2019 19:37:48 +0100 Subject: [PATCH 284/591] [clangd] Try harder to find a plausible `clang` as argv0, particularly on Mac. Summary: Fixes https://github.com/clangd/clangd/issues/211 Fixes https://github.com/clangd/clangd/issues/178 No tests - this is hard to test, and basically impossible to verify what we want (this produces compile commands that work on a real mac with recent toolchain) (Need someone on mac to verify it actually fixes these!) Reviewers: kbobyrev, ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70863 --- .../clangd/GlobalCompilationDatabase.cpp | 150 ++++++++++++++++-- .../clangd/unittests/BackgroundIndexTests.cpp | 4 +- .../GlobalCompilationDatabaseTests.cpp | 5 +- 3 files changed, 142 insertions(+), 17 deletions(-) diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp index ed3b86f0f55b9..8e78fedf44bb7 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -18,7 +18,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" #include #include #include @@ -27,6 +29,113 @@ namespace clang { namespace clangd { namespace { +// Query apple's `xcrun` launcher, which is the source of truth for "how should" +// clang be invoked on this system. +llvm::Optional queryXcrun(llvm::ArrayRef Argv) { + auto Xcrun = llvm::sys::findProgramByName("xcrun"); + if (!Xcrun) { + log("Couldn't find xcrun. Hopefully you have a non-apple toolchain..."); + return llvm::None; + } + llvm::SmallString<64> OutFile; + llvm::sys::fs::createTemporaryFile("clangd-xcrun", "", OutFile); + llvm::FileRemover OutRemover(OutFile); + llvm::Optional Redirects[3] = { + /*stdin=*/{""}, /*stdout=*/{OutFile}, /*stderr=*/{""}}; + vlog("Invoking {0} to find clang installation", *Xcrun); + int Ret = llvm::sys::ExecuteAndWait(*Xcrun, Argv, + /*Env=*/llvm::None, Redirects, + /*SecondsToWait=*/10); + if (Ret != 0) { + log("xcrun exists but failed with code {0}. " + "If you have a non-apple toolchain, this is OK. " + "Otherwise, try xcode-select --install.", + Ret); + return llvm::None; + } + + auto Buf = llvm::MemoryBuffer::getFile(OutFile); + if (!Buf) { + log("Can't read xcrun output: {0}", Buf.getError().message()); + return llvm::None; + } + StringRef Path = Buf->get()->getBuffer().trim(); + if (Path.empty()) { + log("xcrun produced no output"); + return llvm::None; + } + return Path.str(); +} + +// On Mac, `which clang` is /usr/bin/clang. It runs `xcrun clang`, which knows +// where the real clang is kept. We need to do the same thing, +// because cc1 (not the driver!) will find libc++ relative to argv[0]. +llvm::Optional queryMacClangPath() { +#ifndef __APPLE__ + return llvm::None; +#endif + + return queryXcrun({"xcrun", "--find", "clang"}); +} + +// Resolve symlinks if possible. +std::string resolve(std::string Path) { + llvm::SmallString<128> Resolved; + if (llvm::sys::fs::real_path(Path, Resolved)) + return Path; // On error; + return Resolved.str(); +} + +// Get a plausible full `clang` path. +// This is used in the fallback compile command, or when the CDB returns a +// generic driver with no path. +llvm::StringRef getFallbackClangPath() { + static const std::string &MemoizedFallbackPath = [&]() -> std::string { + // The driver and/or cc1 sometimes depend on the binary name to compute + // useful things like the standard library location. + // We need to emulate what clang on this system is likely to see. + // cc1 in particular looks at the "real path" of the running process, and + // so if /usr/bin/clang is a symlink, it sees the resolved path. + // clangd doesn't have that luxury, so we resolve symlinks ourselves. + + // /usr/bin/clang on a mac is a program that redirects to the right clang. + // We resolve it as if it were a symlink. + if (auto MacClang = queryMacClangPath()) + return resolve(std::move(*MacClang)); + // On other platforms, just look for compilers on the PATH. + for (const char* Name : {"clang", "gcc", "cc"}) + if (auto PathCC = llvm::sys::findProgramByName(Name)) + return resolve(std::move(*PathCC)); + // Fallback: a nonexistent 'clang' binary next to clangd. + static int Dummy; + std::string ClangdExecutable = + llvm::sys::fs::getMainExecutable("clangd", (void *)&Dummy); + SmallString<128> ClangPath; + ClangPath = llvm::sys::path::parent_path(ClangdExecutable); + llvm::sys::path::append(ClangPath, "clang"); + return ClangPath.str(); + }(); + return MemoizedFallbackPath; +} + +// On mac, /usr/bin/clang sets SDKROOT and then invokes the real clang. +// The effect of this is to set -isysroot correctly. We do the same. +const std::string *getMacSysroot() { +#ifndef __APPLE__ + return nullptr; +#endif + + // SDKROOT overridden in environment, respect it. Driver will set isysroot. + if (::getenv("SDKROOT")) + return nullptr; + static const llvm::Optional &Sysroot = + queryXcrun({"xcrun", "--show-sdk-path"}); + return Sysroot ? Sysroot.getPointer() : nullptr; +} + +// Transform a command into the form we want to send to the driver. +// The command was originally either from the CDB or is the fallback command, +// and may have been modified by OverlayCDB. void adjustArguments(tooling::CompileCommand &Cmd, llvm::StringRef ResourceDir) { tooling::ArgumentsAdjuster ArgsAdjuster = tooling::combineAdjusters( @@ -40,10 +149,35 @@ void adjustArguments(tooling::CompileCommand &Cmd, tooling::getClangSyntaxOnlyAdjuster())); Cmd.CommandLine = ArgsAdjuster(Cmd.CommandLine, Cmd.Filename); + // Check whether the flag exists, either as -flag or -flag=* + auto Has = [&](llvm::StringRef Flag) { + for (llvm::StringRef Arg : Cmd.CommandLine) { + if (Arg.consume_front(Flag) && (Arg.empty() || Arg[0] == '=')) + return true; + } + return false; + }; // Inject the resource dir. - // FIXME: Don't overwrite it if it's already there. - if (!ResourceDir.empty()) + if (!ResourceDir.empty() && !Has("-resource-dir")) Cmd.CommandLine.push_back(("-resource-dir=" + ResourceDir).str()); + if (!Has("-isysroot")) + if (const std::string *MacSysroot = getMacSysroot()) { + Cmd.CommandLine.push_back("-isysroot"); + Cmd.CommandLine.push_back(*MacSysroot); + } + + // If the driver is a generic name like "g++" with no path, add a clang path. + // This makes it easier for us to find the standard libraries on mac. + if (!Cmd.CommandLine.empty()) { + std::string &Driver = Cmd.CommandLine.front(); + if (Driver == "clang" || Driver == "clang++" || Driver == "gcc" || + Driver == "g++" || Driver == "cc" || Driver == "c++") { + llvm::SmallString<128> QualifiedDriver = + llvm::sys::path::parent_path(getFallbackClangPath()); + llvm::sys::path::append(QualifiedDriver, Driver); + Driver = QualifiedDriver.str(); + } + } } std::string getStandardResourceDir() { @@ -63,19 +197,9 @@ void actOnAllParentDirectories(PathRef FileName, } // namespace -static std::string getFallbackClangPath() { - static int Dummy; - std::string ClangdExecutable = - llvm::sys::fs::getMainExecutable("clangd", (void *)&Dummy); - SmallString<128> ClangPath; - ClangPath = llvm::sys::path::parent_path(ClangdExecutable); - llvm::sys::path::append(ClangPath, "clang"); - return ClangPath.str(); -} - tooling::CompileCommand GlobalCompilationDatabase::getFallbackCommand(PathRef File) const { - std::vector Argv = {getFallbackClangPath()}; + std::vector Argv = {"clang"}; // Clang treats .h files as C by default and files without extension as linker // input, resulting in unhelpful diagnostics. // Parsing as Objective C++ is friendly to more cases. diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index c01910e43b40c..01f0ba1b40449 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -542,7 +542,7 @@ TEST_F(BackgroundIndexTest, CmdLineHash) { FS.Files[testPath("A.h")] = ""; Cmd.Filename = "../A.cc"; Cmd.Directory = testPath("build"); - Cmd.CommandLine = {"clang++", "../A.cc", "-fsyntax-only"}; + Cmd.CommandLine = {"/bin/clang++", "../A.cc", "-fsyntax-only"}; CDB.setCompileCommand(testPath("build/../A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); @@ -558,7 +558,7 @@ TEST_F(BackgroundIndexTest, CmdLineHash) { // FIXME: Changing compile commands should be enough to invalidate the cache. FS.Files[testPath("A.cc")] = " "; - Cmd.CommandLine = {"clang++", "../A.cc", "-Dfoo", "-fsyntax-only"}; + Cmd.CommandLine = {"/bin/clang++", "../A.cc", "-Dfoo", "-fsyntax-only"}; CDB.setCompileCommand(testPath("build/../A.cc"), Cmd); ASSERT_TRUE(Idx.blockUntilIdleForTest()); diff --git a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp index 6ac363c5933ee..15f628825b134 100644 --- a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp +++ b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp @@ -105,8 +105,9 @@ TEST_F(OverlayCDBTest, GetCompileCommand) { TEST_F(OverlayCDBTest, GetFallbackCommand) { OverlayCDB CDB(Base.get(), {"-DA=4"}); EXPECT_THAT(CDB.getFallbackCommand(testPath("bar.cc")).CommandLine, - ElementsAre("clang", "-DA=2", testPath("bar.cc"), "-DA=4", - "-fsyntax-only", StartsWith("-resource-dir"))); + ElementsAre(EndsWith("clang"), "-DA=2", testPath("bar.cc"), + "-DA=4", "-fsyntax-only", + StartsWith("-resource-dir"))); } TEST_F(OverlayCDBTest, NoBase) { From ec7436f299f8b2c2e9ad0e2ca66f3cdeb830cf2a Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 2 Dec 2019 18:28:17 +0300 Subject: [PATCH 285/591] Autogenerate test/Analysis/ValueTracking/non-negative-phi-bits.ll test Forgot to stage this change into 0f22e783a038b6983f0fe161eef6cf2add3a4156 commit. --- llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll b/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll index 059bbaa3c4e74..3b1c43df5a701 100755 --- a/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll +++ b/llvm/test/Analysis/ValueTracking/non-negative-phi-bits.ll @@ -8,7 +8,7 @@ define void @test() #0 { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 40 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 39 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void From 96552036e307f7b0dd6477583c3fdb7de17e8aac Mon Sep 17 00:00:00 2001 From: Hideto Ueno Date: Mon, 2 Dec 2019 13:40:09 +0000 Subject: [PATCH 286/591] [Attributor] Copy or port test cases related to Attributor to` Attributor` test folder Summary: This patch moves the test cases related to Attributor to `Transforms/Attributor` folder. We have used `Transforms/FunctionAttrs` as the primary folder for Attributor test but we need to change testing way now. For the test cases which I think functionattrs doesn't infer anything something like (willreturn, nosync, value-simplify, h2s ..etc), I moved them with the command `git mv`. For the test cases in which functoinattrs and attributor are tested, I copied the test to the folder and remove the check only used by functoinattrs. Reviewers: jdoerfert, sstefan1 Reviewed By: jdoerfert Subscribers: jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70843 --- .../{FunctionAttrs => Attributor}/align.ll | 0 .../callbacks.ll | 0 .../dereferenceable-1.ll} | 0 .../Attributor/dereferenceable-2.ll | 356 ++++++++ .../heap_to_stack.ll | 0 .../internal-noalias.ll | 0 .../{FunctionAttrs => Attributor}/liveness.ll | 0 .../{FunctionAttrs => Attributor}/misc.ll | 0 .../new_attributes.ll | 0 .../noalias.ll} | 0 .../test/Transforms/Attributor/nocapture-1.ll | 346 ++++++++ .../nocapture-2.ll} | 0 llvm/test/Transforms/Attributor/nofree.ll | 243 ++++++ llvm/test/Transforms/Attributor/nonnull.ll | 817 ++++++++++++++++++ llvm/test/Transforms/Attributor/norecurse.ll | 147 ++++ .../fn_noreturn.ll => Attributor/noreturn.ll} | 0 .../noreturn_async.ll | 2 +- .../noreturn_sync.ll | 2 +- .../{FunctionAttrs => Attributor}/nosync.ll | 37 - llvm/test/Transforms/Attributor/nounwind.ll | 98 +++ .../read_write_returned_arguments_scc.ll | 0 llvm/test/Transforms/Attributor/readattrs.ll | 145 ++++ llvm/test/Transforms/Attributor/returned.ll | 812 +++++++++++++++++ .../value-simplify.ll | 0 .../willreturn.ll | 71 -- .../Transforms/FunctionAttrs/arg_returned.ll | 158 ---- .../Transforms/FunctionAttrs/nocapture.ll | 72 +- .../FunctionAttrs/nofree-attributor.ll | 69 -- llvm/test/Transforms/FunctionAttrs/nonnull.ll | 259 ++---- .../Transforms/FunctionAttrs/norecurse.ll | 121 +-- .../test/Transforms/FunctionAttrs/nounwind.ll | 13 - .../Transforms/FunctionAttrs/readattrs.ll | 36 +- .../InferFunctionAttrs/dereferenceable.ll | 28 - 33 files changed, 3081 insertions(+), 751 deletions(-) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/align.ll (100%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/callbacks.ll (100%) rename llvm/test/Transforms/{FunctionAttrs/dereferenceable.ll => Attributor/dereferenceable-1.ll} (100%) create mode 100644 llvm/test/Transforms/Attributor/dereferenceable-2.ll rename llvm/test/Transforms/{FunctionAttrs => Attributor}/heap_to_stack.ll (100%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/internal-noalias.ll (100%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/liveness.ll (100%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/misc.ll (100%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/new_attributes.ll (100%) rename llvm/test/Transforms/{FunctionAttrs/noalias_returned.ll => Attributor/noalias.ll} (100%) create mode 100644 llvm/test/Transforms/Attributor/nocapture-1.ll rename llvm/test/Transforms/{FunctionAttrs/arg_nocapture.ll => Attributor/nocapture-2.ll} (100%) create mode 100644 llvm/test/Transforms/Attributor/nofree.ll create mode 100644 llvm/test/Transforms/Attributor/nonnull.ll create mode 100644 llvm/test/Transforms/Attributor/norecurse.ll rename llvm/test/Transforms/{FunctionAttrs/fn_noreturn.ll => Attributor/noreturn.ll} (100%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/noreturn_async.ll (96%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/noreturn_sync.ll (96%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/nosync.ll (82%) create mode 100644 llvm/test/Transforms/Attributor/nounwind.ll rename llvm/test/Transforms/{FunctionAttrs => Attributor}/read_write_returned_arguments_scc.ll (100%) create mode 100644 llvm/test/Transforms/Attributor/readattrs.ll create mode 100644 llvm/test/Transforms/Attributor/returned.ll rename llvm/test/Transforms/{FunctionAttrs => Attributor}/value-simplify.ll (100%) rename llvm/test/Transforms/{FunctionAttrs => Attributor}/willreturn.ll (78%) diff --git a/llvm/test/Transforms/FunctionAttrs/align.ll b/llvm/test/Transforms/Attributor/align.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/align.ll rename to llvm/test/Transforms/Attributor/align.ll diff --git a/llvm/test/Transforms/FunctionAttrs/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/callbacks.ll rename to llvm/test/Transforms/Attributor/callbacks.ll diff --git a/llvm/test/Transforms/FunctionAttrs/dereferenceable.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/dereferenceable.ll rename to llvm/test/Transforms/Attributor/dereferenceable-1.ll diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2.ll b/llvm/test/Transforms/Attributor/dereferenceable-2.ll new file mode 100644 index 0000000000000..b3c0440f930f4 --- /dev/null +++ b/llvm/test/Transforms/Attributor/dereferenceable-2.ll @@ -0,0 +1,356 @@ +; RUN: opt < %s -attributor --attributor-disable=false -S | FileCheck %s --check-prefix=ATTRIBUTOR +; Copied from Transforms/InferFunctionAttrs/dereferenceable.ll + +; Determine dereference-ability before unused loads get deleted: +; https://bugs.llvm.org/show_bug.cgi?id=21780 + +define <4 x double> @PR21780(double* %ptr) { +; ATTRIBUTOR-LABEL: @PR21780(double* nocapture nofree nonnull readonly align 8 dereferenceable(32) %ptr) + + ; GEP of index 0 is simplified away. + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1 + %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2 + %arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3 + + %t0 = load double, double* %ptr, align 8 + %t1 = load double, double* %arrayidx1, align 8 + %t2 = load double, double* %arrayidx2, align 8 + %t3 = load double, double* %arrayidx3, align 8 + + %vecinit0 = insertelement <4 x double> undef, double %t0, i32 0 + %vecinit1 = insertelement <4 x double> %vecinit0, double %t1, i32 1 + %vecinit2 = insertelement <4 x double> %vecinit1, double %t2, i32 2 + %vecinit3 = insertelement <4 x double> %vecinit2, double %t3, i32 3 + %shuffle = shufflevector <4 x double> %vecinit3, <4 x double> %vecinit3, <4 x i32> + ret <4 x double> %shuffle +} + + +define double @PR21780_only_access3_with_inbounds(double* %ptr) { +; ATTRIBUTOR-LABEL: @PR21780_only_access3_with_inbounds(double* nocapture nofree nonnull readonly align 8 dereferenceable(32) %ptr) + + %arrayidx3 = getelementptr inbounds double, double* %ptr, i64 3 + %t3 = load double, double* %arrayidx3, align 8 + ret double %t3 +} + +define double @PR21780_only_access3_without_inbounds(double* %ptr) { +; ATTRIBUTOR-LABEL: @PR21780_only_access3_without_inbounds(double* nocapture nofree readonly align 8 %ptr) + %arrayidx3 = getelementptr double, double* %ptr, i64 3 + %t3 = load double, double* %arrayidx3, align 8 + ret double %t3 +} + +define double @PR21780_without_inbounds(double* %ptr) { +; ATTRIBUTOR-LABEL: @PR21780_without_inbounds(double* nocapture nofree nonnull readonly align 8 dereferenceable(32) %ptr) + + %arrayidx1 = getelementptr double, double* %ptr, i64 1 + %arrayidx2 = getelementptr double, double* %ptr, i64 2 + %arrayidx3 = getelementptr double, double* %ptr, i64 3 + + %t0 = load double, double* %ptr, align 8 + %t1 = load double, double* %arrayidx1, align 8 + %t2 = load double, double* %arrayidx2, align 8 + %t3 = load double, double* %arrayidx3, align 8 + + ret double %t3 +} + +; Unsimplified, but still valid. Also, throw in some bogus arguments. + +define void @gep0(i8* %unused, i8* %other, i8* %ptr) { +; ATTRIBUTOR-LABEL: @gep0(i8* nocapture nofree readnone %unused, i8* nocapture nofree nonnull writeonly dereferenceable(1) %other, i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr) + %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 + %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 + %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 + %t0 = load i8, i8* %arrayidx0 + %t1 = load i8, i8* %arrayidx1 + %t2 = load i8, i8* %arrayidx2 + store i8 %t2, i8* %other + ret void +} + +; Order of accesses does not change computation. +; Multiple arguments may be dereferenceable. + +define void @ordering(i8* %ptr1, i32* %ptr2) { +; ATTRIBUTOR-LABEL: @ordering(i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr1, i32* nocapture nofree nonnull readonly dereferenceable(8) %ptr2) + %a20 = getelementptr i32, i32* %ptr2, i64 0 + %a12 = getelementptr i8, i8* %ptr1, i64 2 + %t12 = load i8, i8* %a12 + %a11 = getelementptr i8, i8* %ptr1, i64 1 + %t20 = load i32, i32* %a20 + %a10 = getelementptr i8, i8* %ptr1, i64 0 + %t10 = load i8, i8* %a10 + %t11 = load i8, i8* %a11 + %a21 = getelementptr i32, i32* %ptr2, i64 1 + %t21 = load i32, i32* %a21 + ret void +} + +; Not in entry block. + +define void @not_entry_but_guaranteed_to_execute(i8* %ptr) { +; ATTRIBUTOR-LABEL: @not_entry_but_guaranteed_to_execute(i8* nocapture nofree nonnull readonly dereferenceable(3) %ptr) +entry: + br label %exit +exit: + %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 + %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 + %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 + %t0 = load i8, i8* %arrayidx0 + %t1 = load i8, i8* %arrayidx1 + %t2 = load i8, i8* %arrayidx2 + ret void +} + +; Not in entry block and not guaranteed to execute. + +define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { +; ATTRIBUTOR-LABEL: @not_entry_not_guaranteed_to_execute(i8* nocapture nofree readonly %ptr, i1 %cond) +entry: + br i1 %cond, label %loads, label %exit +loads: + %arrayidx0 = getelementptr i8, i8* %ptr, i64 0 + %arrayidx1 = getelementptr i8, i8* %ptr, i64 1 + %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 + %t0 = load i8, i8* %arrayidx0 + %t1 = load i8, i8* %arrayidx1 + %t2 = load i8, i8* %arrayidx2 + ret void +exit: + ret void +} + +; The last load may not execute, so derefenceable bytes only covers the 1st two loads. + +define void @partial_in_entry(i16* %ptr, i1 %cond) { +; ATTRIBUTOR-LABEL: @partial_in_entry(i16* nocapture nofree nonnull readonly dereferenceable(4) %ptr, i1 %cond) +entry: + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load i16, i16* %arrayidx0 + %t1 = load i16, i16* %arrayidx1 + br i1 %cond, label %loads, label %exit +loads: + %t2 = load i16, i16* %arrayidx2 + ret void +exit: + ret void +} + +; The volatile load can't be used to prove a non-volatile access is allowed. +; The 2nd and 3rd loads may never execute. + +define void @volatile_is_not_dereferenceable(i16* %ptr) { +; ATTRIBUTOR-LABEL: @volatile_is_not_dereferenceable(i16* nofree %ptr) + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load volatile i16, i16* %arrayidx0 + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + +; TODO: We should allow inference for atomic (but not volatile) ops. + +define void @atomic_is_alright(i16* %ptr) { +; ATTRIBUTOR-LABEL: @atomic_is_alright(i16* nocapture nofree nonnull readonly align 2 dereferenceable(6) %ptr) + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load atomic i16, i16* %arrayidx0 unordered, align 2 + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + +declare void @may_not_return() + +define void @not_guaranteed_to_transfer_execution(i16* %ptr) { +; ATTRIBUTOR-LABEL: @not_guaranteed_to_transfer_execution(i16* nocapture nonnull readonly dereferenceable(2) %ptr) + %arrayidx0 = getelementptr i16, i16* %ptr, i64 0 + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t0 = load i16, i16* %arrayidx0 + call void @may_not_return() + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + +; We must have consecutive accesses. + +define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { +; ATTRIBUTOR-LABEL: @variable_gep_index(i8* nocapture nofree readnone %unused, i8* nocapture nofree nonnull readonly dereferenceable(1) %ptr, i64 %variable_index) + %arrayidx1 = getelementptr i8, i8* %ptr, i64 %variable_index + %arrayidx2 = getelementptr i8, i8* %ptr, i64 2 + %t0 = load i8, i8* %ptr + %t1 = load i8, i8* %arrayidx1 + %t2 = load i8, i8* %arrayidx2 + ret void +} + +; Deal with >1 GEP index. + +define void @multi_index_gep(<4 x i8>* %ptr) { +; FIXME: %ptr should be dereferenceable(4) +; ATTRIBUTOR-LABEL: @multi_index_gep(<4 x i8>* nocapture nofree nonnull readonly dereferenceable(1) %ptr) + %arrayidx00 = getelementptr <4 x i8>, <4 x i8>* %ptr, i64 0, i64 0 + %t0 = load i8, i8* %arrayidx00 + ret void +} + +; Could round weird bitwidths down? + +define void @not_byte_multiple(i9* %ptr) { +; ATTRIBUTOR-LABEL: @not_byte_multiple(i9* nocapture nofree nonnull readonly dereferenceable(2) %ptr) + %arrayidx0 = getelementptr i9, i9* %ptr, i64 0 + %t0 = load i9, i9* %arrayidx0 + ret void +} + +; Missing direct access from the pointer. + +define void @no_pointer_deref(i16* %ptr) { +; ATTRIBUTOR-LABEL: @no_pointer_deref(i16* nocapture nofree readonly %ptr) + %arrayidx1 = getelementptr i16, i16* %ptr, i64 1 + %arrayidx2 = getelementptr i16, i16* %ptr, i64 2 + %t1 = load i16, i16* %arrayidx1 + %t2 = load i16, i16* %arrayidx2 + ret void +} + +; Out-of-order is ok, but missing access concludes dereferenceable range. + +define void @non_consecutive(i32* %ptr) { +; ATTRIBUTOR-LABEL: @non_consecutive(i32* nocapture nofree nonnull readonly dereferenceable(8) %ptr) + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %t1 = load i32, i32* %arrayidx1 + %t0 = load i32, i32* %arrayidx0 + %t3 = load i32, i32* %arrayidx3 + ret void +} + +; Improve on existing dereferenceable attribute. + +define void @more_bytes(i32* dereferenceable(8) %ptr) { +; ATTRIBUTOR-LABEL: @more_bytes(i32* nocapture nofree nonnull readonly dereferenceable(16) %ptr) + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx2 = getelementptr i32, i32* %ptr, i64 2 + %t3 = load i32, i32* %arrayidx3 + %t1 = load i32, i32* %arrayidx1 + %t2 = load i32, i32* %arrayidx2 + %t0 = load i32, i32* %arrayidx0 + ret void +} + +; Improve on existing dereferenceable_or_null attribute. + +define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { +; ATTRIBUTOR-LABEL: @more_bytes_and_not_null(i32* nocapture nofree nonnull readonly dereferenceable(16) %ptr) + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx2 = getelementptr i32, i32* %ptr, i64 2 + %t3 = load i32, i32* %arrayidx3 + %t1 = load i32, i32* %arrayidx1 + %t2 = load i32, i32* %arrayidx2 + %t0 = load i32, i32* %arrayidx0 + ret void +} + +; But don't pessimize existing dereferenceable attribute. + +define void @better_bytes(i32* dereferenceable(100) %ptr) { +; ATTRIBUTOR-LABEL: @better_bytes(i32* nocapture nofree nonnull readonly dereferenceable(100) %ptr) + %arrayidx3 = getelementptr i32, i32* %ptr, i64 3 + %arrayidx1 = getelementptr i32, i32* %ptr, i64 1 + %arrayidx0 = getelementptr i32, i32* %ptr, i64 0 + %arrayidx2 = getelementptr i32, i32* %ptr, i64 2 + %t3 = load i32, i32* %arrayidx3 + %t1 = load i32, i32* %arrayidx1 + %t2 = load i32, i32* %arrayidx2 + %t0 = load i32, i32* %arrayidx0 + ret void +} + +define void @bitcast(i32* %arg) { +; ATTRIBUTOR-LABEL: @bitcast(i32* nocapture nofree nonnull readonly dereferenceable(8) %arg) + %ptr = bitcast i32* %arg to float* + %arrayidx0 = getelementptr float, float* %ptr, i64 0 + %arrayidx1 = getelementptr float, float* %ptr, i64 1 + %t0 = load float, float* %arrayidx0 + %t1 = load float, float* %arrayidx1 + ret void +} + +define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { +; ATTRIBUTOR-LABEL: @bitcast_different_sizes(double* nocapture nofree nonnull readonly dereferenceable(12) %arg1, i8* nocapture nofree nonnull readonly dereferenceable(16) %arg2) + %ptr1 = bitcast double* %arg1 to float* + %a10 = getelementptr float, float* %ptr1, i64 0 + %a11 = getelementptr float, float* %ptr1, i64 1 + %a12 = getelementptr float, float* %ptr1, i64 2 + %ld10 = load float, float* %a10 + %ld11 = load float, float* %a11 + %ld12 = load float, float* %a12 + + %ptr2 = bitcast i8* %arg2 to i64* + %a20 = getelementptr i64, i64* %ptr2, i64 0 + %a21 = getelementptr i64, i64* %ptr2, i64 1 + %ld20 = load i64, i64* %a20 + %ld21 = load i64, i64* %a21 + ret void +} + +define void @negative_offset(i32* %arg) { +; ATTRIBUTOR-LABEL: @negative_offset(i32* nocapture nofree nonnull readonly dereferenceable(4) %arg) + %ptr = bitcast i32* %arg to float* + %arrayidx0 = getelementptr float, float* %ptr, i64 0 + %arrayidx1 = getelementptr float, float* %ptr, i64 -1 + %t0 = load float, float* %arrayidx0 + %t1 = load float, float* %arrayidx1 + ret void +} + +define void @stores(i32* %arg) { +; ATTRIBUTOR-LABEL: @stores(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) + %ptr = bitcast i32* %arg to float* + %arrayidx0 = getelementptr float, float* %ptr, i64 0 + %arrayidx1 = getelementptr float, float* %ptr, i64 1 + store float 1.0, float* %arrayidx0 + store float 2.0, float* %arrayidx1 + ret void +} + +define void @load_store(i32* %arg) { +; ATTRIBUTOR-LABEL: @load_store(i32* nocapture nofree nonnull dereferenceable(8) %arg) + %ptr = bitcast i32* %arg to float* + %arrayidx0 = getelementptr float, float* %ptr, i64 0 + %arrayidx1 = getelementptr float, float* %ptr, i64 1 + %t1 = load float, float* %arrayidx0 + store float 2.0, float* %arrayidx1 + ret void +} + +define void @different_size1(i32* %arg) { +; ATTRIBUTOR-LABEL: @different_size1(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) + %arg-cast = bitcast i32* %arg to double* + store double 0.000000e+00, double* %arg-cast + store i32 0, i32* %arg + ret void +} + +define void @different_size2(i32* %arg) { +; ATTRIBUTOR-LABEL: @different_size2(i32* nocapture nofree nonnull writeonly dereferenceable(8) %arg) + store i32 0, i32* %arg + %arg-cast = bitcast i32* %arg to double* + store double 0.000000e+00, double* %arg-cast + ret void +} diff --git a/llvm/test/Transforms/FunctionAttrs/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/heap_to_stack.ll rename to llvm/test/Transforms/Attributor/heap_to_stack.ll diff --git a/llvm/test/Transforms/FunctionAttrs/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/internal-noalias.ll rename to llvm/test/Transforms/Attributor/internal-noalias.ll diff --git a/llvm/test/Transforms/FunctionAttrs/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/liveness.ll rename to llvm/test/Transforms/Attributor/liveness.ll diff --git a/llvm/test/Transforms/FunctionAttrs/misc.ll b/llvm/test/Transforms/Attributor/misc.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/misc.ll rename to llvm/test/Transforms/Attributor/misc.ll diff --git a/llvm/test/Transforms/FunctionAttrs/new_attributes.ll b/llvm/test/Transforms/Attributor/new_attributes.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/new_attributes.ll rename to llvm/test/Transforms/Attributor/new_attributes.ll diff --git a/llvm/test/Transforms/FunctionAttrs/noalias_returned.ll b/llvm/test/Transforms/Attributor/noalias.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/noalias_returned.ll rename to llvm/test/Transforms/Attributor/noalias.ll diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll new file mode 100644 index 0000000000000..abb148d883ed4 --- /dev/null +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -0,0 +1,346 @@ +; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -S -attributor-annotate-decl-cs < %s | FileCheck %s --check-prefixes=ATTRIBUTOR +; RUN: opt -passes=attributor -attributor-manifest-internal -attributor-disable=false -S -attributor-annotate-decl-cs < %s | FileCheck %s --check-prefixes=ATTRIBUTOR +; Copied from Transforms/FunctoinAttrs/nocapture.ll + +@g = global i32* null ; [#uses=1] + +; ATTRIBUTOR: define i32* @c1(i32* nofree readnone returned "no-capture-maybe-returned" %q) +define i32* @c1(i32* %q) { + ret i32* %q +} + +; ATTRIBUTOR: define void @c2(i32* nofree writeonly %q) +; It would also be acceptable to mark %q as readnone. Update @c3 too. +define void @c2(i32* %q) { + store i32* %q, i32** @g + ret void +} + +; ATTRIBUTOR: define void @c3(i32* nofree writeonly %q) +define void @c3(i32* %q) { + call void @c2(i32* %q) + ret void +} + +; ATTRIBUTOR: define i1 @c4(i32* nofree readnone %q, i32 %bitno) +define i1 @c4(i32* %q, i32 %bitno) { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = trunc i32 %tmp2 to i1 + br i1 %bit, label %l1, label %l0 +l0: + ret i1 0 ; escaping value not caught by def-use chaining. +l1: + ret i1 1 ; escaping value not caught by def-use chaining. +} + +; c4b is c4 but without the escaping part +; ATTRIBUTOR: define i1 @c4b(i32* nocapture nofree readnone %q, i32 %bitno) +define i1 @c4b(i32* %q, i32 %bitno) { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = trunc i32 %tmp2 to i1 + br i1 %bit, label %l1, label %l0 +l0: + ret i1 0 ; not escaping! +l1: + ret i1 0 ; not escaping! +} + +@lookup_table = global [2 x i1] [ i1 0, i1 1 ] + +; ATTRIBUTOR: define i1 @c5(i32* nofree readonly %q, i32 %bitno) +define i1 @c5(i32* %q, i32 %bitno) { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = and i32 %tmp2, 1 + ; subtle escape mechanism follows + %lookup = getelementptr [2 x i1], [2 x i1]* @lookup_table, i32 0, i32 %bit + %val = load i1, i1* %lookup + ret i1 %val +} + +declare void @throw_if_bit_set(i8*, i8) readonly + +; ATTRIBUTOR: define i1 @c6(i8* readonly %q, i8 %bit) +define i1 @c6(i8* %q, i8 %bit) personality i32 (...)* @__gxx_personality_v0 { + invoke void @throw_if_bit_set(i8* %q, i8 %bit) + to label %ret0 unwind label %ret1 +ret0: + ret i1 0 +ret1: + %exn = landingpad {i8*, i32} + cleanup + ret i1 1 +} + +declare i32 @__gxx_personality_v0(...) + +define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind { + %tmp = ptrtoint i32* %q to i32 + %tmp2 = lshr i32 %tmp, %bitno + %bit = and i32 %tmp2, 1 + %lookup = getelementptr [2 x i1], [2 x i1]* @lookup_table, i32 0, i32 %bit + ret i1* %lookup +} + +; ATTRIBUTOR: define i1 @c7(i32* nofree readonly %q, i32 %bitno) +define i1 @c7(i32* %q, i32 %bitno) { + %ptr = call i1* @lookup_bit(i32* %q, i32 %bitno) + %val = load i1, i1* %ptr + ret i1 %val +} + + +; ATTRIBUTOR: define i32 @nc1(i32* nofree %q, i32* nocapture nofree %p, i1 %b) +define i32 @nc1(i32* %q, i32* %p, i1 %b) { +e: + br label %l +l: + %x = phi i32* [ %p, %e ] + %y = phi i32* [ %q, %e ] + %tmp = bitcast i32* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %val = load i32, i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp + store i32* %y, i32** @g + ret i32 %val +} + +; ATTRIBUTOR: define i32 @nc1_addrspace(i32* nofree %q, i32 addrspace(1)* nocapture nofree %p, i1 %b) +define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* %p, i1 %b) { +e: + br label %l +l: + %x = phi i32 addrspace(1)* [ %p, %e ] + %y = phi i32* [ %q, %e ] + %tmp = addrspacecast i32 addrspace(1)* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %val = load i32, i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp + store i32* %y, i32** @g + ret i32 %val +} + +; ATTRIBUTOR: define void @nc2(i32* nocapture nofree %p, i32* nofree %q) +define void @nc2(i32* %p, i32* %q) { + %1 = call i32 @nc1(i32* %q, i32* %p, i1 0) ; [#uses=0] + ret void +} + + +; ATTRIBUTOR: define void @nc3(void ()* nocapture nofree nonnull %p) +define void @nc3(void ()* %p) { + call void %p() + ret void +} + +declare void @external(i8*) readonly nounwind +; ATTRIBUTOR: define void @nc4(i8* nocapture readonly %p) +define void @nc4(i8* %p) { + call void @external(i8* %p) + ret void +} + +; ATTRIBUTOR: define void @nc5(void (i8*)* nocapture nofree nonnull %f, i8* nocapture %p) +define void @nc5(void (i8*)* %f, i8* %p) { + call void %f(i8* %p) readonly nounwind + call void %f(i8* nocapture %p) + ret void +} + +; ATTRIBUTOR: define void @test1_1(i8* nocapture nofree readnone %x1_1, i8* nocapture nofree readnone %y1_1, i1 %c) +; It would be acceptable to add readnone to %y1_1 and %y1_2. +define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { + call i8* @test1_2(i8* %x1_1, i8* %y1_1, i1 %c) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define i8* @test1_2(i8* nocapture nofree readnone %x1_2, i8* nofree readnone returned "no-capture-maybe-returned" %y1_2, i1 %c) +define i8* @test1_2(i8* %x1_2, i8* %y1_2, i1 %c) { + br i1 %c, label %t, label %f +t: + call void @test1_1(i8* %x1_2, i8* %y1_2, i1 %c) + store i32* null, i32** @g + br label %f +f: + ret i8* %y1_2 +} + +; ATTRIBUTOR: define void @test2(i8* nocapture nofree readnone %x2) +define void @test2(i8* %x2) { + call void @test2(i8* %x2) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define void @test3(i8* nocapture nofree readnone %x3, i8* nocapture nofree readnone %y3, i8* nocapture nofree readnone %z3) +define void @test3(i8* %x3, i8* %y3, i8* %z3) { + call void @test3(i8* %z3, i8* %y3, i8* %x3) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define void @test4_1(i8* nocapture nofree readnone %x4_1, i1 %c) +define void @test4_1(i8* %x4_1, i1 %c) { + call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1, i1 %c) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define i8* @test4_2(i8* nocapture nofree readnone %x4_2, i8* nofree readnone returned "no-capture-maybe-returned" %y4_2, i8* nocapture nofree readnone %z4_2, i1 %c) +define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2, i1 %c) { + br i1 %c, label %t, label %f +t: + call void @test4_1(i8* null, i1 %c) + store i32* null, i32** @g + br label %f +f: + ret i8* %y4_2 +} + +declare i8* @test5_1(i8* %x5_1) + +; ATTRIBUTOR: define void @test5_2(i8* %x5_2) +define void @test5_2(i8* %x5_2) { + call i8* @test5_1(i8* %x5_2) + store i32* null, i32** @g + ret void +} + +declare void @test6_1(i8* %x6_1, i8* nocapture %y6_1, ...) + +; ATTRIBUTOR: define void @test6_2(i8* %x6_2, i8* nocapture %y6_2, i8* %z6_2) +define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) { + call void (i8*, i8*, ...) @test6_1(i8* %x6_2, i8* %y6_2, i8* %z6_2) + store i32* null, i32** @g + ret void +} + +; ATTRIBUTOR: define void @test_cmpxchg(i32* nocapture nofree nonnull dereferenceable(4) %p) +define void @test_cmpxchg(i32* %p) { + cmpxchg i32* %p, i32 0, i32 1 acquire monotonic + ret void +} + +; ATTRIBUTOR: define void @test_cmpxchg_ptr(i32** nocapture nofree nonnull dereferenceable(8) %p, i32* nofree %q) +define void @test_cmpxchg_ptr(i32** %p, i32* %q) { + cmpxchg i32** %p, i32* null, i32* %q acquire monotonic + ret void +} + +; ATTRIBUTOR: define void @test_atomicrmw(i32* nocapture nofree nonnull dereferenceable(4) %p) +define void @test_atomicrmw(i32* %p) { + atomicrmw add i32* %p, i32 1 seq_cst + ret void +} + +; ATTRIBUTOR: define void @test_volatile(i32* nofree align 4 %x) +define void @test_volatile(i32* %x) { +entry: + %gep = getelementptr i32, i32* %x, i64 1 + store volatile i32 0, i32* %gep, align 4 + ret void +} + +; ATTRIBUTOR: nocaptureLaunder(i8* nocapture %p) +define void @nocaptureLaunder(i8* %p) { +entry: + %b = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + store i8 42, i8* %b + ret void +} + +@g2 = global i8* null +; ATTRIBUTOR: define void @captureLaunder(i8* %p) +define void @captureLaunder(i8* %p) { + %b = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + store i8* %b, i8** @g2 + ret void +} + +; ATTRIBUTOR: @nocaptureStrip(i8* nocapture writeonly %p) +define void @nocaptureStrip(i8* %p) { +entry: + %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) + store i8 42, i8* %b + ret void +} + +@g3 = global i8* null +; ATTRIBUTOR: define void @captureStrip(i8* writeonly %p) +define void @captureStrip(i8* %p) { + %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) + store i8* %b, i8** @g3 + ret void +} + +; ATTRIBUTOR: define i1 @captureICmp(i32* nofree readnone %x) +define i1 @captureICmp(i32* %x) { + %1 = icmp eq i32* %x, null + ret i1 %1 +} + +; ATTRIBUTOR: define i1 @captureICmpRev(i32* nofree readnone %x) +define i1 @captureICmpRev(i32* %x) { + %1 = icmp eq i32* null, %x + ret i1 %1 +} + +; ATTRIBUTOR: define i1 @nocaptureInboundsGEPICmp(i32* nocapture nofree nonnull readnone %x) +define i1 @nocaptureInboundsGEPICmp(i32* %x) { + %1 = getelementptr inbounds i32, i32* %x, i32 5 + %2 = bitcast i32* %1 to i8* + %3 = icmp eq i8* %2, null + ret i1 %3 +} + +; ATTRIBUTOR: define i1 @nocaptureInboundsGEPICmpRev(i32* nocapture nofree nonnull readnone %x) +define i1 @nocaptureInboundsGEPICmpRev(i32* %x) { + %1 = getelementptr inbounds i32, i32* %x, i32 5 + %2 = bitcast i32* %1 to i8* + %3 = icmp eq i8* null, %2 + ret i1 %3 +} + +; ATTRIBUTOR: define i1 @nocaptureDereferenceableOrNullICmp(i32* nocapture nofree readnone dereferenceable_or_null(4) %x) +define i1 @nocaptureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) { + %1 = bitcast i32* %x to i8* + %2 = icmp eq i8* %1, null + ret i1 %2 +} + +; ATTRIBUTOR: define i1 @captureDereferenceableOrNullICmp(i32* nofree readnone dereferenceable_or_null(4) %x) +define i1 @captureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) "null-pointer-is-valid"="true" { + %1 = bitcast i32* %x to i8* + %2 = icmp eq i8* %1, null + ret i1 %2 +} + +declare void @unknown(i8*) +define void @test_callsite() { +entry: +; We know that 'null' in AS 0 does not alias anything and cannot be captured. Though the latter is not qurried -> derived atm. +; ATTRIBUTOR: call void @unknown(i8* noalias null) + call void @unknown(i8* null) + ret void +} + +declare i8* @unknownpi8pi8(i8*,i8* returned) +define i8* @test_returned1(i8* %A, i8* returned %B) nounwind readonly { +; ATTRIBUTOR: define i8* @test_returned1(i8* nocapture readonly %A, i8* readonly returned %B) +entry: + %p = call i8* @unknownpi8pi8(i8* %A, i8* %B) + ret i8* %p +} + +define i8* @test_returned2(i8* %A, i8* %B) { +; ATTRIBUTOR: define i8* @test_returned2(i8* nocapture readonly %A, i8* readonly returned %B) +entry: + %p = call i8* @unknownpi8pi8(i8* %A, i8* %B) nounwind readonly + ret i8* %p +} + +declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll b/llvm/test/Transforms/Attributor/nocapture-2.ll similarity index 100% rename from llvm/test/Transforms/FunctionAttrs/arg_nocapture.ll rename to llvm/test/Transforms/Attributor/nocapture-2.ll diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll new file mode 100644 index 0000000000000..d06a0ea1e9b08 --- /dev/null +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -0,0 +1,243 @@ +; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +; Copied from Transforms/FunctoinAttrs/nofree-attributor.ll + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Test cases specifically designed for the "nofree" function attribute. +; We use FIXME's to indicate problems and missing attributes. + +; Free functions +declare void @free(i8* nocapture) local_unnamed_addr #1 +declare noalias i8* @realloc(i8* nocapture, i64) local_unnamed_addr #0 +declare void @_ZdaPv(i8*) local_unnamed_addr #2 + + +; TEST 1 (positive case) +; ATTRIBUTOR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define void @only_return() +define void @only_return() #0 { + ret void +} + + +; TEST 2 (negative case) +; Only free +; void only_free(char* p) { +; free(p); +; } + +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: nofree +; ATTRIBUTOR-NEXT: define void @only_free(i8* nocapture %0) local_unnamed_addr #1 +define void @only_free(i8* nocapture %0) local_unnamed_addr #0 { + tail call void @free(i8* %0) #1 + ret void +} + + +; TEST 3 (negative case) +; Free occurs in same scc. +; void free_in_scc1(char*p){ +; free_in_scc2(p); +; } +; void free_in_scc2(char*p){ +; free_in_scc1(p); +; free(p); +; } + + +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: nofree +; ATTRIBUTOR-NEXT :define void @free_in_scc1(i8* nocapture %0) local_unnamed_addr +define void @free_in_scc1(i8* nocapture %0) local_unnamed_addr #0 { + tail call void @free_in_scc2(i8* %0) #1 + ret void +} + + +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: nofree +; ATTRIBUTOR: define void @free_in_scc2(i8* nocapture %0) local_unnamed_addr +define void @free_in_scc2(i8* nocapture %0) local_unnamed_addr #0 { + %cmp = icmp eq i8* %0, null + br i1 %cmp, label %rec, label %call +call: + tail call void @free(i8* %0) #1 + br label %end +rec: + tail call void @free_in_scc1(i8* %0) + br label %end +end: + ret void +} + + +; TEST 4 (positive case) +; Free doesn't occur. +; void mutual_recursion1(){ +; mutual_recursion2(); +; } +; void mutual_recursion2(){ +; mutual_recursion1(); +; } + + +; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define void @mutual_recursion1() +define void @mutual_recursion1() #0 { + call void @mutual_recursion2() + ret void +} + +; ATTRIBUTOR: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable +; ATTRIBUTOR-NEXT: define void @mutual_recursion2() +define void @mutual_recursion2() #0 { + call void @mutual_recursion1() + ret void +} + + +; TEST 5 +; C++ delete operation (negative case) +; void delete_op (char p[]){ +; delete [] p; +; } + +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: nofree +; ATTRIBUTOR-NEXT: define void @_Z9delete_opPc(i8* %0) local_unnamed_addr #1 +define void @_Z9delete_opPc(i8* %0) local_unnamed_addr #0 { + %2 = icmp eq i8* %0, null + br i1 %2, label %4, label %3 + +;