diff --git a/llvm/include/llvm/IR/VectorTypeUtils.h b/llvm/include/llvm/IR/VectorTypeUtils.h index d24c714f99cb2..dd9b182682008 100644 --- a/llvm/include/llvm/IR/VectorTypeUtils.h +++ b/llvm/include/llvm/IR/VectorTypeUtils.h @@ -40,6 +40,10 @@ Type *toScalarizedStructTy(StructType *StructTy); /// are vectors of matching element count. This does not include empty structs. bool isVectorizedStructTy(StructType *StructTy); +/// Returns true if `StructTy` is an unpacked literal struct where all elements +/// are scalars that can be used as vector element types. +bool canVectorizeStructTy(StructType *StructTy); + /// A helper for converting to vectorized types. For scalar types, this is /// equivalent to calling `toVectorTy`. For struct types, this returns a new /// struct where each element type has been widened to a vector type. @@ -71,6 +75,18 @@ inline bool isVectorizedTy(Type *Ty) { return Ty->isVectorTy(); } +/// Returns true if `Ty` is a valid vector element type, void, or an unpacked +/// literal struct where all elements are valid vector element types. +/// Note: Even if a type can be vectorized that does not mean it is valid to do +/// so in all cases. For example, a vectorized struct (as returned by +/// toVectorizedTy) does not perform (de)interleaving, so it can't be used for +/// vectorizing loads/stores. +inline bool canVectorizeTy(Type *Ty) { + if (StructType *StructTy = dyn_cast(Ty)) + return canVectorizeStructTy(StructTy); + return Ty->isVoidTy() || VectorType::isValidElementType(Ty); +} + /// Returns the types contained in `Ty`. For struct types, it returns the /// elements, all other types are returned directly. inline ArrayRef getContainedTypes(Type *const &Ty) { diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index fbe80eddbae07..72fda911962ad 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -422,6 +422,10 @@ class LoopVectorizationLegality { /// has a vectorized variant available. bool hasVectorCallVariants() const { return VecCallVariantsFound; } + /// Returns true if there is at least one function call in the loop which + /// returns a struct type and needs to be vectorized. + bool hasStructVectorCall() const { return StructVecCallFound; } + unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } @@ -644,6 +648,12 @@ class LoopVectorizationLegality { /// the use of those function variants. bool VecCallVariantsFound = false; + /// If we find a call (to be vectorized) that returns a struct type, record + /// that so we can bail out until this is supported. + /// TODO: Remove this flag once vectorizing calls with struct returns is + /// supported. + bool StructVecCallFound = false; + /// Indicates whether this loop has an uncountable early exit, i.e. an /// uncountable exiting block that is not the latch. bool HasUncountableEarlyExit = false; diff --git a/llvm/lib/IR/VectorTypeUtils.cpp b/llvm/lib/IR/VectorTypeUtils.cpp index e6e265414a2b8..62e39aab90079 100644 --- a/llvm/lib/IR/VectorTypeUtils.cpp +++ b/llvm/lib/IR/VectorTypeUtils.cpp @@ -52,3 +52,11 @@ bool llvm::isVectorizedStructTy(StructType *StructTy) { return Ty->isVectorTy() && cast(Ty)->getElementCount() == VF; }); } + +/// Returns true if `StructTy` is an unpacked literal struct where all elements +/// are scalars that can be used as vector element types. +bool llvm::canVectorizeStructTy(StructType *StructTy) { + auto ElemTys = StructTy->elements(); + return !ElemTys.empty() && isUnpackedStructLiteral(StructTy) && + all_of(ElemTys, VectorType::isValidElementType); +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index cb0b4641b6492..54a244ecd26fe 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -778,6 +778,18 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { return Scalarize; } +/// Returns true if the call return type `Ty` can be widened by the loop +/// vectorizer. +static bool canWidenCallReturnType(Type *Ty) { + auto *StructTy = dyn_cast(Ty); + // TODO: Remove the homogeneous types restriction. This is just an initial + // simplification. When we want to support things like the overflow intrinsics + // we will have to lift this restriction. + if (StructTy && !StructTy->containsHomogeneousTypes()) + return false; + return canVectorizeTy(StructTy); +} + bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *Header = TheLoop->getHeader(); @@ -942,11 +954,29 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (CI && !VFDatabase::getMappings(*CI).empty()) VecCallVariantsFound = true; + auto CanWidenInstructionTy = [this](Instruction const &Inst) { + Type *InstTy = Inst.getType(); + if (!isa(InstTy)) + return canVectorizeTy(InstTy); + + // For now, we only recognize struct values returned from calls where + // all users are extractvalue as vectorizable. All element types of the + // struct must be types that can be widened. + if (isa(Inst) && canWidenCallReturnType(InstTy) && + all_of(Inst.users(), IsaPred)) { + // TODO: Remove the `StructVecCallFound` flag once vectorizing calls + // with struct returns is supported. + StructVecCallFound = true; + return true; + } + + return false; + }; + // Check that the instruction return type is vectorizable. // We can't vectorize casts from vector type to scalar type. // Also, we can't vectorize extractelement instructions. - if ((!VectorType::isValidElementType(I.getType()) && - !I.getType()->isVoidTy()) || + if (!CanWidenInstructionTy(I) || (isa(I) && !VectorType::isValidElementType(I.getOperand(0)->getType())) || isa(I)) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0797100b182cb..5b0a0e1e4dc55 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10348,6 +10348,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } + if (LVL.hasStructVectorCall()) { + reportVectorizationFailure("Auto-vectorization of calls that return struct " + "types is not yet supported", + "StructCallVectorizationUnsupported", ORE, L); + return false; + } + // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before // even evaluating whether vectorization is profitable. Since we cannot modify diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll new file mode 100644 index 0000000000000..77781f95b0858 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll @@ -0,0 +1,97 @@ +; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -pass-remarks-analysis=loop-vectorize 2>%t | FileCheck %s +; RUN: cat %t | FileCheck --check-prefix=CHECK-REMARKS %s + +target triple = "aarch64-unknown-linux-gnu" + +; Tests basic vectorization of scalable homogeneous struct literal returns. + +; TODO: Support vectorization in this case. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported +define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @struct_return_f32_widen +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @foo(float %in_val) #0 + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; TODO: Support vectorization in this case. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported +define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @struct_return_f64_widen +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @bar(double %in_val) #1 + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; TODO: Support vectorization in this case. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported +define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) { +; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @foo(float %in_val) #0 + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +declare { float, float } @foo(float) +declare { double, double } @bar(double) + +declare { , } @scalable_vec_masked_foo(, ) +declare { , } @scalable_vec_masked_bar(, ) + + +attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" } +attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_bar(scalable_vec_masked_bar)" } diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll new file mode 100644 index 0000000000000..9f98e8af2e98c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll @@ -0,0 +1,406 @@ +; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S -pass-remarks-analysis=loop-vectorize 2>%t | FileCheck %s +; RUN: cat %t | FileCheck --check-prefix=CHECK-REMARKS %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +; Tests basic vectorization of homogeneous struct literal returns. + +; TODO: Support vectorization in this case. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported +define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @struct_return_f32_widen +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @foo(float %in_val) #0 + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; TODO: Support vectorization in this case. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported +define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @struct_return_f64_widen +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call { double, double } @bar(double %in_val) #1 + %extract_a = extractvalue { double, double } %call, 0 + %extract_b = extractvalue { double, double } %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; TODO: Support vectorization in this case. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported +define void @struct_return_f32_replicate(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @struct_return_f32_replicate +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + ; #3 does not have a fixed-size vector mapping (so replication is used) + %call = tail call { float, float } @foo(float %in_val) #3 + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; TODO: Support vectorization in this case. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported +define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) { +; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @foo(float %in_val) #0 + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: call instruction cannot be vectorized +define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @test_overflow_intrinsic +; CHECK-NOT: vector.body: +; CHECK-NOT: @llvm.sadd.with.overflow.v{{.+}}i32 +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load i32, ptr %arrayidx, align 4 + %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val) + %extract_ret = extractvalue { i32, i1 } %call, 0 + %extract_overflow = extractvalue { i32, i1 } %call, 1 + %zext_overflow = zext i1 %extract_overflow to i8 + %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv + store i32 %extract_ret, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %iv + store i8 %zext_overflow, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; TODO: Support vectorization in this case. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported +define void @struct_return_i32_three_results_widen(ptr noalias %in, ptr noalias writeonly %out_a) { +; CHECK-LABEL: define void @struct_return_i32_three_results_widen +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %in, i64 %iv + %in_val = load i32, ptr %arrayidx, align 4 + %call = tail call { i32, i32, i32 } @qux(i32 %in_val) #5 + %extract_a = extractvalue { i32, i32, i32 } %call, 0 + %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv + store i32 %extract_a, ptr %arrayidx2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; Negative test. Widening structs of vectors is not supported. +; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized +define void @negative_struct_of_vectors(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @negative_struct_of_vectors +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load <1 x float>, ptr %arrayidx, align 4 + %call = tail call { <1 x float>, <1 x float> } @foo(<1 x float> %in_val) #0 + %extract_a = extractvalue { <1 x float>, <1 x float> } %call, 0 + %extract_b = extractvalue { <1 x float>, <1 x float> } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store <1 x float> %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store <1 x float> %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; Negative test. Widening structs with mixed element types is not supported. +; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized +define void @negative_mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @negative_mixed_element_type_struct_return +; CHECK-NOT: vector.body: +; CHECK-NOT: call {{.*}} @fixed_vec_baz +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, i32 } @baz(float %in_val) #2 + %extract_a = extractvalue { float, i32 } %call, 0 + %extract_b = extractvalue { float, i32 } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %out_b, i64 %iv + store i32 %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +%named_struct = type { double, double } + +; Negative test. Widening non-literal structs is not supported. +; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized +define void @negative_named_struct_return(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @negative_named_struct_return +; CHECK-NOT: vector.body: +; CHECK-NOT: call {{.*}} @fixed_vec_bar +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv + %in_val = load double, ptr %arrayidx, align 8 + %call = tail call %named_struct @bar_named(double %in_val) #4 + %extract_a = extractvalue %named_struct %call, 0 + %extract_b = extractvalue %named_struct %call, 1 + %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv + store double %extract_a, ptr %arrayidx2, align 8 + %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv + store double %extract_b, ptr %arrayidx4, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; Negative test. Nested homogeneous structs are not supported. +; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized +define void @negative_nested_struct(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @negative_nested_struct +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { { float, float } } @foo_nested_struct(float %in_val) #0 + %extract_inner = extractvalue { { float, float } } %call, 0 + %extract_a = extractvalue { float, float } %extract_inner, 0 + %extract_b = extractvalue { float, float } %extract_inner, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; Negative test. The second element of the struct cannot be widened. +; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized +define void @negative_non_widenable_element(ptr noalias %in, ptr noalias writeonly %out_a) { +; CHECK-LABEL: define void @negative_non_widenable_element +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, [1 x float] } @foo_one_non_widenable_element(float %in_val) #0 + %extract_a = extractvalue { float, [1 x float] } %call, 0 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; Negative test. Homogeneous structs of arrays are not supported. +; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized +define void @negative_struct_array_elements(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @negative_struct_array_elements +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { [2 x float] } @foo_arrays(float %in_val) #0 + %extract_inner = extractvalue { [2 x float] } %call, 0 + %extract_a = extractvalue [2 x float] %extract_inner, 0 + %extract_b = extractvalue [2 x float] %extract_inner, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; Negative test. Widening struct loads is not supported. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized +define void @negative_struct_load(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { +; CHECK-LABEL: define void @negative_struct_load +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds { float, float }, ptr %in, i64 %iv + %call = load { float, float }, ptr %arrayidx, align 8 + %extract_a = extractvalue { float, float } %call, 0 + %extract_b = extractvalue { float, float } %call, 1 + %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv + store float %extract_a, ptr %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv + store float %extract_b, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +; Negative test. Widening struct stores is not supported. +; CHECK-REMARKS: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized +define void @negative_struct_return_store_struct(ptr noalias %in, ptr noalias writeonly %out) { +; CHECK-LABEL: define void @negative_struct_return_store_struct +; CHECK-NOT: vector.body: +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds { float, float }, ptr %in, i64 %iv + %in_val = load float, ptr %arrayidx, align 4 + %call = tail call { float, float } @foo(float %in_val) #0 + %out_ptr = getelementptr inbounds { float, float }, ptr %out, i64 %iv + store { float, float } %call, ptr %out_ptr, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} + +declare { float, float } @foo(float) +declare { double, double } @bar(double) +declare { float, i32 } @baz(float) +declare %named_struct @bar_named(double) +declare { { float, float } } @foo_nested_struct(float) +declare { [2 x float] } @foo_arrays(float) +declare { float, [1 x float] } @foo_one_non_widenable_element(float) +declare { <1 x float>, <1 x float> } @foo_vectors(<1 x float>) +declare { i32, i32, i32 } @qux(i32) + +declare { <2 x float>, <2 x float> } @fixed_vec_foo(<2 x float>) +declare { <2 x double>, <2 x double> } @fixed_vec_bar(<2 x double>) +declare { <2 x float>, <2 x i32> } @fixed_vec_baz(<2 x float>) +declare { <2 x i32>, <2 x i32>, <2 x i32> } @fixed_vec_qux(<2 x i32>) + +declare { , } @scalable_vec_masked_foo(, ) + +attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_foo(fixed_vec_foo)" } +attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar(fixed_vec_bar)" } +attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_baz(fixed_vec_baz)" } +attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" } +attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar_named(fixed_vec_bar)" } +attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_qux(fixed_vec_qux)" }