From 9a5042f54f4b1a256b5e3724137b09aa95e8f5ff Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Mon, 30 Sep 2024 10:11:38 -0400 Subject: [PATCH 1/9] [Scalarizer] A change to let the scalarizer pass be able to scalarize structs --- llvm/include/llvm/IR/IntrinsicsDirectX.td | 3 + .../DirectX/DirectXTargetTransformInfo.cpp | 77 ++++++++++--------- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 69 ++++++++++++++++- llvm/test/CodeGen/DirectX/split-double.ll | 10 +++ 4 files changed, 119 insertions(+), 40 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/split-double.ll diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 27a437a83be6d..d8dd2beca4899 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -89,5 +89,8 @@ def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrCon def int_dx_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>; def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; +def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], + [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; + def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; } diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 8ea31401121bc..231afd8ae3eea 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -1,38 +1,39 @@ -//===- DirectXTargetTransformInfo.cpp - DirectX TTI ---------------*- C++ -//-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -//===----------------------------------------------------------------------===// - -#include "DirectXTargetTransformInfo.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IntrinsicsDirectX.h" - -using namespace llvm; - -bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, - unsigned ScalarOpdIdx) { - switch (ID) { - case Intrinsic::dx_wave_readlane: - return ScalarOpdIdx == 1; - default: - return false; - } -} - -bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( - Intrinsic::ID ID) const { - switch (ID) { - case Intrinsic::dx_frac: - case Intrinsic::dx_rsqrt: - case Intrinsic::dx_wave_readlane: - return true; - default: - return false; - } -} +//===- DirectXTargetTransformInfo.cpp - DirectX TTI ---------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +//===----------------------------------------------------------------------===// + +#include "DirectXTargetTransformInfo.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" + +using namespace llvm; + +bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, + unsigned ScalarOpdIdx) { + switch (ID) { + case Intrinsic::dx_wave_readlane: + return ScalarOpdIdx == 1; + default: + return false; + } +} + +bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( + Intrinsic::ID ID) const { + switch (ID) { + case Intrinsic::dx_frac: + case Intrinsic::dx_rsqrt: + case Intrinsic::dx_wave_readlane: + case Intrinsic::dx_splitdouble: + return true; + default: + return false; + } +} diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index b1e4c7e52d99a..5a7253915695b 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -197,6 +197,11 @@ struct VectorLayout { uint64_t SplitSize = 0; }; +static bool isStructOfVectors(Type *Ty) { + return isa(Ty) && Ty->getNumContainedTypes() > 0 && + isa(Ty->getContainedType(0)); +} + /// Concatenate the given fragments to a single vector value of the type /// described in @p VS. static Value *concatenate(IRBuilder<> &Builder, ArrayRef Fragments, @@ -276,6 +281,7 @@ class ScalarizerVisitor : public InstVisitor { bool visitBitCastInst(BitCastInst &BCI); bool visitInsertElementInst(InsertElementInst &IEI); bool visitExtractElementInst(ExtractElementInst &EEI); + bool visitExtractValueInst(ExtractValueInst &EVI); bool visitShuffleVectorInst(ShuffleVectorInst &SVI); bool visitPHINode(PHINode &PHI); bool visitLoadInst(LoadInst &LI); @@ -552,7 +558,10 @@ void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op, // Determine how Ty is split, if at all. std::optional ScalarizerVisitor::getVectorSplit(Type *Ty) { VectorSplit Split; - Split.VecTy = dyn_cast(Ty); + if (isStructOfVectors(Ty)) + Split.VecTy = cast(Ty->getContainedType(0)); + else + Split.VecTy = dyn_cast(Ty); if (!Split.VecTy) return {}; @@ -1030,6 +1039,33 @@ bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { return true; } +bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) { + Value *Op = EVI.getOperand(0); + Type *OpTy = Op->getType(); + ValueVector Res; + if (!isStructOfVectors(OpTy)) + return false; + // Note: isStructOfVectors is also used in getVectorSplit. + // The intent is to bail on this visit if it isn't a struct + // of vectors. Downside is that when it is true we do two + // isStructOfVectors calls. + std::optional VS = getVectorSplit(OpTy); + if (!VS) + return false; + Scatterer Op0 = scatter(&EVI, Op, *VS); + assert(!EVI.getIndices().empty() && "Make sure an index exists"); + // Note for our use case we only care about the top level index. + unsigned Index = EVI.getIndices()[0]; + for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) { + Value *ResElem = Builder.CreateExtractValue( + Op0[OpIdx], Index, EVI.getName() + ".elem" + std::to_string(Index)); + Res.push_back(ResElem); + } + // replaceUses(&EVI, Res); + gather(&EVI, Res, *VS); + return true; +} + bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { std::optional VS = getVectorSplit(EEI.getOperand(0)->getType()); if (!VS) @@ -1196,7 +1232,7 @@ bool ScalarizerVisitor::finish() { if (!Op->use_empty()) { // The value is still needed, so recreate it using a series of // insertelements and/or shufflevectors. - Value *Res; + Value *Res = nullptr; if (auto *Ty = dyn_cast(Op->getType())) { BasicBlock *BB = Op->getParent(); IRBuilder<> Builder(Op); @@ -1209,6 +1245,35 @@ bool ScalarizerVisitor::finish() { Res = concatenate(Builder, CV, VS, Op->getName()); Res->takeName(Op); + } else if (auto *Ty = dyn_cast(Op->getType())) { + BasicBlock *BB = Op->getParent(); + IRBuilder<> Builder(Op); + if (isa(Op)) + Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); + + // Iterate over each element in the struct + uint NumOfStructElements = Ty->getNumElements(); + SmallVector ElemCV(NumOfStructElements); + for (unsigned I = 0; I < NumOfStructElements; ++I) { + for (auto *CVelem : CV) { + Value *Elem = Builder.CreateExtractValue( + CVelem, I, Op->getName() + ".elem" + std::to_string(I)); + ElemCV[I].push_back(Elem); + } + } + Res = PoisonValue::get(Ty); + for (unsigned I = 0; I < NumOfStructElements; ++I) { + Type *ElemTy = Ty->getElementType(I); + assert(isa(ElemTy) && + "Only Structs of all FixedVectorType supported"); + VectorSplit VS = *getVectorSplit(ElemTy); + assert(VS.NumFragments == CV.size()); + + Value *ConcatenatedVector = + concatenate(Builder, ElemCV[I], VS, Op->getName()); + Res = Builder.CreateInsertValue(Res, ConcatenatedVector, I, + Op->getName() + ".insert"); + } } else { assert(CV.size() == 1 && Op->getType() == CV[0]->getType()); Res = CV[0]; diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll new file mode 100644 index 0000000000000..7d3c28efbc63c --- /dev/null +++ b/llvm/test/CodeGen/DirectX/split-double.ll @@ -0,0 +1,10 @@ + +; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) local_unnamed_addr { + %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) + %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0 + %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1 + %3 = add <3 x i32> %1, %2 + ret <3 x i32> %3 +} From da2e3c16aaea1168ba0324258fbc359a82e1d0a3 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Tue, 8 Oct 2024 00:59:15 -0400 Subject: [PATCH 2/9] Add support for frexp. Move vector look up to just callInst and extractValue instruction visits --- llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 - llvm/lib/Transforms/Scalar/Scalarizer.cpp | 53 ++++++++++++------ llvm/test/CodeGen/DirectX/split-double.ll | 36 ++++++++++--- llvm/test/Transforms/Scalarizer/frexp.ll | 66 +++++++++++++++++++++++ 4 files changed, 133 insertions(+), 23 deletions(-) create mode 100644 llvm/test/Transforms/Scalarizer/frexp.ll diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index d8dd2beca4899..e30d37f69f781 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -91,6 +91,5 @@ def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32 def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>; def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; - def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; } diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 5a7253915695b..f91d7ba755ac6 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -197,9 +197,15 @@ struct VectorLayout { uint64_t SplitSize = 0; }; -static bool isStructOfVectors(Type *Ty) { - return isa(Ty) && Ty->getNumContainedTypes() > 0 && - isa(Ty->getContainedType(0)); +static bool isStructAllVectors(Type *Ty) { + if (!isa(Ty)) + return false; + + for(unsigned I = 0; I < Ty->getNumContainedTypes(); I++) + if (!isa(Ty->getContainedType(I))) + return false; + + return true; } /// Concatenate the given fragments to a single vector value of the type @@ -558,10 +564,7 @@ void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op, // Determine how Ty is split, if at all. std::optional ScalarizerVisitor::getVectorSplit(Type *Ty) { VectorSplit Split; - if (isStructOfVectors(Ty)) - Split.VecTy = cast(Ty->getContainedType(0)); - else - Split.VecTy = dyn_cast(Ty); + Split.VecTy = dyn_cast(Ty); if (!Split.VecTy) return {}; @@ -676,6 +679,10 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { if (isTriviallyVectorizable(ID)) return true; + switch (ID) { + case Intrinsic::frexp: + return true; + } return Intrinsic::isTargetIntrinsic(ID) && TTI->isTargetIntrinsicTriviallyScalarizable(ID); } @@ -683,7 +690,13 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { /// If a call to a vector typed intrinsic function, split into a scalar call per /// element if possible for the intrinsic. bool ScalarizerVisitor::splitCall(CallInst &CI) { - std::optional VS = getVectorSplit(CI.getType()); + Type* CallType = CI.getType(); + bool areAllVectors = isStructAllVectors(CallType); + std::optional VS; + if (areAllVectors) + VS = getVectorSplit(CallType->getContainedType(0)); + else + VS = getVectorSplit(CallType); if (!VS) return false; @@ -708,6 +721,18 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) Tys.push_back(VS->SplitTy); + if(areAllVectors) { + Type* PrevType = CallType->getContainedType(0); + Type* CallType = CI.getType(); + for(unsigned I = 1; I < CallType->getNumContainedTypes(); I++) { + Type* CurrType = cast(CallType->getContainedType(I)); + if(PrevType != CurrType) { + std::optional CurrVS = getVectorSplit(CurrType); + Tys.push_back(CurrVS->SplitTy); + PrevType = CurrType; + } + } + } // Assumes that any vector type has the same number of elements as the return // vector type, which is true for all current intrinsics. for (unsigned I = 0; I != NumArgs; ++I) { @@ -1043,15 +1068,13 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) { Value *Op = EVI.getOperand(0); Type *OpTy = Op->getType(); ValueVector Res; - if (!isStructOfVectors(OpTy)) + if (!isStructAllVectors(OpTy)) return false; - // Note: isStructOfVectors is also used in getVectorSplit. - // The intent is to bail on this visit if it isn't a struct - // of vectors. Downside is that when it is true we do two - // isStructOfVectors calls. - std::optional VS = getVectorSplit(OpTy); + Type* VecType = cast(OpTy->getContainedType(0)); + std::optional VS = getVectorSplit(VecType); if (!VS) return false; + IRBuilder<> Builder(&EVI); Scatterer Op0 = scatter(&EVI, Op, *VS); assert(!EVI.getIndices().empty() && "Make sure an index exists"); // Note for our use case we only care about the top level index. @@ -1252,7 +1275,7 @@ bool ScalarizerVisitor::finish() { Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); // Iterate over each element in the struct - uint NumOfStructElements = Ty->getNumElements(); + unsigned NumOfStructElements = Ty->getNumElements(); SmallVector ElemCV(NumOfStructElements); for (unsigned I = 0; I < NumOfStructElements; ++I) { for (auto *CVelem : CV) { diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll index 7d3c28efbc63c..4fc5fdd1922a2 100644 --- a/llvm/test/CodeGen/DirectX/split-double.ll +++ b/llvm/test/CodeGen/DirectX/split-double.ll @@ -1,10 +1,32 @@ +; RUN: opt -S -scalarizer -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s -; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +define void @test_vector_double_split_void(<3 x double> noundef %d) { + %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) + ret void +} -define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) local_unnamed_addr { - %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) - %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0 - %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1 - %3 = add <3 x i32> %1, %2 - ret <3 x i32> %3 +define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) { + ; CHECK: [[ee0:%.*]] = extractelement <3 x double> %d, i64 0 + ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <3 x double> %d, i64 1 + ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]]) + ; CHECK: [[ee2:%.*]] = extractelement <3 x double> %d, i64 2 + ; CHECK: [[ie2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee2]]) + ; CHECK: [[ev00:%.*]] = extractvalue { i32, i32 } [[ie0]], 0 + ; CHECK: [[ev01:%.*]] = extractvalue { i32, i32 } [[ie1]], 0 + ; CHECK: [[ev02:%.*]] = extractvalue { i32, i32 } [[ie2]], 0 + ; CHECK: [[ev10:%.*]] = extractvalue { i32, i32 } [[ie0]], 1 + ; CHECK: [[ev11:%.*]] = extractvalue { i32, i32 } [[ie1]], 1 + ; CHECK: [[ev12:%.*]] = extractvalue { i32, i32 } [[ie2]], 1 + ; CHECK: [[add1:%.*]] = add i32 [[ev00]], [[ev10]] + ; CHECK: [[add2:%.*]] = add i32 [[ev01]], [[ev11]] + ; CHECK: [[add3:%.*]] = add i32 [[ev02]], [[ev12]] + ; CHECK: insertelement <3 x i32> poison, i32 [[add1]], i64 0 + ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add2]], i64 1 + ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add3]], i64 2 + %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) + %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0 + %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1 + %3 = add <3 x i32> %1, %2 + ret <3 x i32> %3 } diff --git a/llvm/test/Transforms/Scalarizer/frexp.ll b/llvm/test/Transforms/Scalarizer/frexp.ll new file mode 100644 index 0000000000000..454042e6887c3 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/frexp.ll @@ -0,0 +1,66 @@ +; RUN: opt %s -passes='function(scalarizer)' -S | FileCheck %s + +; CHECK-LABEL: @test_vector_half_frexp_half +define noundef <2 x half> @test_vector_half_frexp_half(<2 x half> noundef %h) { + ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0 + ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]]) + ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1 + ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]]) + ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { half, i32 } [[ie0]], 0 + ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { half, i32 } [[ie1]], 0 + ; CHECK-NEXT: insertelement <2 x half> poison, half [[ev00]], i64 0 + ; CHECK-NEXT: insertelement <2 x half> %{{.*}}, half [[ev01]], i64 1 + %r = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h) + %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0 + ret <2 x half> %e0 +} + +; CHECK-LABEL: @test_vector_half_frexp_int +define noundef <2 x i32> @test_vector_half_frexp_int(<2 x half> noundef %h) { + ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0 + ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]]) + ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1 + ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]]) + ; CHECK-NEXT: [[ev10:%.*]] = extractvalue { half, i32 } [[ie0]], 1 + ; CHECK-NEXT: [[ev11:%.*]] = extractvalue { half, i32 } [[ie1]], 1 + ; CHECK-NEXT: insertelement <2 x i32> poison, i32 [[ev10]], i64 0 + ; CHECK-NEXT: insertelement <2 x i32> %{{.*}}, i32 [[ev11]], i64 1 + %r = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h) + %e1 = extractvalue { <2 x half>, <2 x i32> } %r, 1 + ret <2 x i32> %e1 +} + + +define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f) { + ; CHECK: [[ee0:%.*]] = extractelement <2 x float> %f, i64 0 + ; CHECK-NEXT: [[ie0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee0]]) + ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x float> %f, i64 1 + ; CHECK-NEXT: [[ie1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee1]]) + ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { float, i32 } [[ie0]], 0 + ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { float, i32 } [[ie1]], 0 + ; CHECK-NEXT: insertelement <2 x float> poison, float [[ev00]], i64 0 + ; CHECK-NEXT: insertelement <2 x float> %{{.*}}, float [[ev01]], i64 1 + ; CHECK-NEXT: extractvalue { float, i32 } [[ie0]], 1 + ; CHECK-NEXT: extractvalue { float, i32 } [[ie1]], 1 + %1 = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x float> %f) + %2 = extractvalue { <2 x float>, <2 x i32> } %1, 0 + %3 = extractvalue { <2 x float>, <2 x i32> } %1, 1 + ret <2 x float> %2 +} + +define noundef <2 x double> @test_vector_double_frexp_int(<2 x double> noundef %d) { + ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0 + ; CHECK-NEXT: [[ie0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee0]]) + ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1 + ; CHECK-NEXT: [[ie1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee1]]) + ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { double, i32 } [[ie0]], 0 + ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { double, i32 } [[ie1]], 0 + ; CHECK-NEXT: insertelement <2 x double> poison, double [[ev00]], i64 0 + ; CHECK-NEXT: insertelement <2 x double> %{{.*}}, double [[ev01]], i64 1 + ; CHECK-NEXT: extractvalue { double, i32 } [[ie0]], 1 + ; CHECK-NEXT: extractvalue { double, i32 } [[ie1]], 1 + %1 = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %d) + %2 = extractvalue { <2 x double>, <2 x i32> } %1, 0 + %3 = extractvalue { <2 x double>, <2 x i32> } %1, 1 + ret <2 x double> %2 +} From c5383f3efca4577a8a0197b17a2d8087f9e06f9f Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Tue, 8 Oct 2024 03:44:57 -0400 Subject: [PATCH 3/9] fix up in prep for PR. --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 41 +++++++++++++---------- llvm/test/CodeGen/DirectX/split-double.ll | 14 ++++++-- llvm/test/Transforms/Scalarizer/frexp.ll | 3 +- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index f91d7ba755ac6..f66f13dba6358 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -200,11 +200,17 @@ struct VectorLayout { static bool isStructAllVectors(Type *Ty) { if (!isa(Ty)) return false; - - for(unsigned I = 0; I < Ty->getNumContainedTypes(); I++) - if (!isa(Ty->getContainedType(I))) + if (Ty->getNumContainedTypes() < 1) + return false; + FixedVectorType *VecTy = dyn_cast(Ty->getContainedType(0)); + if (!VecTy) + return false; + unsigned VecSize = VecTy->getNumElements(); + for (unsigned I = 1; I < Ty->getNumContainedTypes(); I++) { + VecTy = dyn_cast(Ty->getContainedType(I)); + if (!VecTy || VecSize != VecTy->getNumElements()) return false; - + } return true; } @@ -679,8 +685,9 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { if (isTriviallyVectorizable(ID)) return true; + // TODO: investigate vectorizable frexp switch (ID) { - case Intrinsic::frexp: + case Intrinsic::frexp: return true; } return Intrinsic::isTargetIntrinsic(ID) && @@ -690,10 +697,10 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { /// If a call to a vector typed intrinsic function, split into a scalar call per /// element if possible for the intrinsic. bool ScalarizerVisitor::splitCall(CallInst &CI) { - Type* CallType = CI.getType(); - bool areAllVectors = isStructAllVectors(CallType); - std::optional VS; - if (areAllVectors) + Type *CallType = CI.getType(); + bool AreAllVectors = isStructAllVectors(CallType); + std::optional VS; + if (AreAllVectors) VS = getVectorSplit(CallType->getContainedType(0)); else VS = getVectorSplit(CallType); @@ -721,12 +728,12 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) Tys.push_back(VS->SplitTy); - if(areAllVectors) { - Type* PrevType = CallType->getContainedType(0); - Type* CallType = CI.getType(); - for(unsigned I = 1; I < CallType->getNumContainedTypes(); I++) { - Type* CurrType = cast(CallType->getContainedType(I)); - if(PrevType != CurrType) { + if (AreAllVectors) { + Type *PrevType = CallType->getContainedType(0); + Type *CallType = CI.getType(); + for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) { + Type *CurrType = cast(CallType->getContainedType(I)); + if (PrevType != CurrType) { std::optional CurrVS = getVectorSplit(CurrType); Tys.push_back(CurrVS->SplitTy); PrevType = CurrType; @@ -1070,7 +1077,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) { ValueVector Res; if (!isStructAllVectors(OpTy)) return false; - Type* VecType = cast(OpTy->getContainedType(0)); + Type *VecType = cast(OpTy->getContainedType(0)); std::optional VS = getVectorSplit(VecType); if (!VS) return false; @@ -1084,7 +1091,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) { Op0[OpIdx], Index, EVI.getName() + ".elem" + std::to_string(Index)); Res.push_back(ResElem); } - // replaceUses(&EVI, Res); + gather(&EVI, Res, *VS); return true; } diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll index 4fc5fdd1922a2..9b70e87ba4794 100644 --- a/llvm/test/CodeGen/DirectX/split-double.ll +++ b/llvm/test/CodeGen/DirectX/split-double.ll @@ -1,10 +1,18 @@ -; RUN: opt -S -scalarizer -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s -define void @test_vector_double_split_void(<3 x double> noundef %d) { - %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) +; CHECK-LABEL: @test_vector_double_split_void +define void @test_vector_double_split_void(<2 x double> noundef %d) { + ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0 + ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]]) + ; CHECK: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1 + ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]]) + ; CHECK-NOT: extractvalue { i32, i32 } {{.*}}, 0 + ; CHECK-NOT: insertelement <2 x i32> {{.*}}, i32 {{.*}}, i64 0 + %hlsl.asuint = call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> %d) ret void } +; CHECK-LABEL: @test_vector_double_split define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) { ; CHECK: [[ee0:%.*]] = extractelement <3 x double> %d, i64 0 ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]]) diff --git a/llvm/test/Transforms/Scalarizer/frexp.ll b/llvm/test/Transforms/Scalarizer/frexp.ll index 454042e6887c3..48159b45c1896 100644 --- a/llvm/test/Transforms/Scalarizer/frexp.ll +++ b/llvm/test/Transforms/Scalarizer/frexp.ll @@ -30,7 +30,7 @@ define noundef <2 x i32> @test_vector_half_frexp_int(<2 x half> noundef %h) { ret <2 x i32> %e1 } - +; CHECK-LABEL: @test_vector_float_frexp_int define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f) { ; CHECK: [[ee0:%.*]] = extractelement <2 x float> %f, i64 0 ; CHECK-NEXT: [[ie0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee0]]) @@ -48,6 +48,7 @@ define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f) ret <2 x float> %2 } +; CHECK-LABEL: @test_vector_double_frexp_int define noundef <2 x double> @test_vector_double_frexp_int(<2 x double> noundef %d) { ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0 ; CHECK-NEXT: [[ie0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee0]]) From 8f7ed3beec93d26576c8d06f501f319863e9b4e5 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Tue, 15 Oct 2024 18:06:49 -0400 Subject: [PATCH 4/9] address pr comments --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index f66f13dba6358..44e2122f10b4b 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -197,16 +197,17 @@ struct VectorLayout { uint64_t SplitSize = 0; }; -static bool isStructAllVectors(Type *Ty) { +static bool isStructOfMatchingFixedVectors(Type *Ty) { if (!isa(Ty)) return false; - if (Ty->getNumContainedTypes() < 1) + unsigned StructSize = Ty->getNumContainedTypes(); + if (StructSize < 1) return false; FixedVectorType *VecTy = dyn_cast(Ty->getContainedType(0)); if (!VecTy) return false; unsigned VecSize = VecTy->getNumElements(); - for (unsigned I = 1; I < Ty->getNumContainedTypes(); I++) { + for (unsigned I = 1; I < StructSize; I++) { VecTy = dyn_cast(Ty->getContainedType(I)); if (!VecTy || VecSize != VecTy->getNumElements()) return false; @@ -685,7 +686,8 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { if (isTriviallyVectorizable(ID)) return true; - // TODO: investigate vectorizable frexp + // TODO: Move frexp to isTriviallyVectorizable. + // https://github.com/llvm/llvm-project/issues/112408 switch (ID) { case Intrinsic::frexp: return true; @@ -698,7 +700,7 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { /// element if possible for the intrinsic. bool ScalarizerVisitor::splitCall(CallInst &CI) { Type *CallType = CI.getType(); - bool AreAllVectors = isStructAllVectors(CallType); + bool AreAllVectors = isStructOfMatchingFixedVectors(CallType); std::optional VS; if (AreAllVectors) VS = getVectorSplit(CallType->getContainedType(0)); @@ -730,7 +732,6 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { if (AreAllVectors) { Type *PrevType = CallType->getContainedType(0); - Type *CallType = CI.getType(); for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) { Type *CurrType = cast(CallType->getContainedType(I)); if (PrevType != CurrType) { @@ -1075,7 +1076,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) { Value *Op = EVI.getOperand(0); Type *OpTy = Op->getType(); ValueVector Res; - if (!isStructAllVectors(OpTy)) + if (!isStructOfMatchingFixedVectors(OpTy)) return false; Type *VecType = cast(OpTy->getContainedType(0)); std::optional VS = getVectorSplit(VecType); @@ -1262,7 +1263,7 @@ bool ScalarizerVisitor::finish() { if (!Op->use_empty()) { // The value is still needed, so recreate it using a series of // insertelements and/or shufflevectors. - Value *Res = nullptr; + Value *Res; if (auto *Ty = dyn_cast(Op->getType())) { BasicBlock *BB = Op->getParent(); IRBuilder<> Builder(Op); @@ -1287,7 +1288,7 @@ bool ScalarizerVisitor::finish() { for (unsigned I = 0; I < NumOfStructElements; ++I) { for (auto *CVelem : CV) { Value *Elem = Builder.CreateExtractValue( - CVelem, I, Op->getName() + ".elem" + std::to_string(I)); + CVelem, I, Op->getName() + ".elem" + Twine(I)); ElemCV[I].push_back(Elem); } } From dfa4963316ba4542190f2b0e8548defea4abd6c6 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Wed, 16 Oct 2024 12:44:01 -0400 Subject: [PATCH 5/9] add safety check in case `getVectorSplit` fails. --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 44e2122f10b4b..539c1568c46e7 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -700,9 +700,9 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { /// element if possible for the intrinsic. bool ScalarizerVisitor::splitCall(CallInst &CI) { Type *CallType = CI.getType(); - bool AreAllVectors = isStructOfMatchingFixedVectors(CallType); + bool AreAllMatchingVectors = isStructOfMatchingFixedVectors(CallType); std::optional VS; - if (AreAllVectors) + if (AreAllMatchingVectors) VS = getVectorSplit(CallType->getContainedType(0)); else VS = getVectorSplit(CallType); @@ -730,12 +730,17 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) Tys.push_back(VS->SplitTy); - if (AreAllVectors) { + if (AreAllMatchingVectors) { Type *PrevType = CallType->getContainedType(0); for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) { Type *CurrType = cast(CallType->getContainedType(I)); if (PrevType != CurrType) { std::optional CurrVS = getVectorSplit(CurrType); + // This case does not seem to happen, but it is possible for + // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit + // is not returned and we will bailout of handling this call. + if (!CurrVS) + return false; Tys.push_back(CurrVS->SplitTy); PrevType = CurrType; } From 5e8cb8bab1fc01ffd0950a40db36e7880d2b11b8 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Thu, 17 Oct 2024 14:36:38 -0400 Subject: [PATCH 6/9] address pr comments --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 2 +- llvm/test/CodeGen/DirectX/split-double.ll | 59 ++++++------ llvm/test/Transforms/Scalarizer/frexp.ll | 108 ++++++++++++++-------- 3 files changed, 100 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 539c1568c46e7..f708eeeb29d31 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -1094,7 +1094,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) { unsigned Index = EVI.getIndices()[0]; for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) { Value *ResElem = Builder.CreateExtractValue( - Op0[OpIdx], Index, EVI.getName() + ".elem" + std::to_string(Index)); + Op0[OpIdx], Index, EVI.getName() + ".elem" + Twine(Index)); Res.push_back(ResElem); } diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll index 9b70e87ba4794..759590fa56279 100644 --- a/llvm/test/CodeGen/DirectX/split-double.ll +++ b/llvm/test/CodeGen/DirectX/split-double.ll @@ -1,37 +1,42 @@ -; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s -; CHECK-LABEL: @test_vector_double_split_void define void @test_vector_double_split_void(<2 x double> noundef %d) { - ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0 - ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]]) - ; CHECK: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1 - ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]]) - ; CHECK-NOT: extractvalue { i32, i32 } {{.*}}, 0 - ; CHECK-NOT: insertelement <2 x i32> {{.*}}, i32 {{.*}}, i64 0 +; CHECK-LABEL: define void @test_vector_double_split_void( +; CHECK-SAME: <2 x double> noundef [[D:%.*]]) { +; CHECK-NEXT: [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0 +; CHECK-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) +; CHECK-NEXT: [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1 +; CHECK-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) +; CHECK-NEXT: ret void +; %hlsl.asuint = call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> %d) ret void } -; CHECK-LABEL: @test_vector_double_split define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) { - ; CHECK: [[ee0:%.*]] = extractelement <3 x double> %d, i64 0 - ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]]) - ; CHECK: [[ee1:%.*]] = extractelement <3 x double> %d, i64 1 - ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]]) - ; CHECK: [[ee2:%.*]] = extractelement <3 x double> %d, i64 2 - ; CHECK: [[ie2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee2]]) - ; CHECK: [[ev00:%.*]] = extractvalue { i32, i32 } [[ie0]], 0 - ; CHECK: [[ev01:%.*]] = extractvalue { i32, i32 } [[ie1]], 0 - ; CHECK: [[ev02:%.*]] = extractvalue { i32, i32 } [[ie2]], 0 - ; CHECK: [[ev10:%.*]] = extractvalue { i32, i32 } [[ie0]], 1 - ; CHECK: [[ev11:%.*]] = extractvalue { i32, i32 } [[ie1]], 1 - ; CHECK: [[ev12:%.*]] = extractvalue { i32, i32 } [[ie2]], 1 - ; CHECK: [[add1:%.*]] = add i32 [[ev00]], [[ev10]] - ; CHECK: [[add2:%.*]] = add i32 [[ev01]], [[ev11]] - ; CHECK: [[add3:%.*]] = add i32 [[ev02]], [[ev12]] - ; CHECK: insertelement <3 x i32> poison, i32 [[add1]], i64 0 - ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add2]], i64 1 - ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add3]], i64 2 +; CHECK-LABEL: define noundef <3 x i32> @test_vector_double_split( +; CHECK-SAME: <3 x double> noundef [[D:%.*]]) { +; CHECK-NEXT: [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0 +; CHECK-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) +; CHECK-NEXT: [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1 +; CHECK-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) +; CHECK-NEXT: [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2 +; CHECK-NEXT: [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]]) +; CHECK-NEXT: [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0 +; CHECK-NEXT: [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0 +; CHECK-NEXT: [[DOTELEM02:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 0 +; CHECK-NEXT: [[DOTELEM1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1 +; CHECK-NEXT: [[DOTELEM13:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 1 +; CHECK-NEXT: [[DOTELEM14:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 1 +; CHECK-NEXT: [[DOTI0:%.*]] = add i32 [[DOTELEM0]], [[DOTELEM1]] +; CHECK-NEXT: [[DOTI1:%.*]] = add i32 [[DOTELEM01]], [[DOTELEM13]] +; CHECK-NEXT: [[DOTI2:%.*]] = add i32 [[DOTELEM02]], [[DOTELEM14]] +; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <3 x i32> poison, i32 [[DOTI0]], i64 0 +; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <3 x i32> [[DOTUPTO015]], i32 [[DOTI1]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> [[DOTUPTO116]], i32 [[DOTI2]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[TMP1]] +; %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0 %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1 diff --git a/llvm/test/Transforms/Scalarizer/frexp.ll b/llvm/test/Transforms/Scalarizer/frexp.ll index 48159b45c1896..6397832f6648c 100644 --- a/llvm/test/Transforms/Scalarizer/frexp.ll +++ b/llvm/test/Transforms/Scalarizer/frexp.ll @@ -1,65 +1,91 @@ -; RUN: opt %s -passes='function(scalarizer)' -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes='function(scalarizer)' -S | FileCheck %s + +define void @test_vector_frexp_void(<2 x double> noundef %d) { +; CHECK-LABEL: define void @test_vector_frexp_void( +; CHECK-SAME: <2 x double> noundef [[D:%.*]]) { +; CHECK-NEXT: [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0 +; CHECK-NEXT: [[DOTI0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[D_I0]]) +; CHECK-NEXT: [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1 +; CHECK-NEXT: [[DOTI1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[D_I1]]) +; CHECK-NEXT: ret void +; + %1 = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %d) + ret void +} -; CHECK-LABEL: @test_vector_half_frexp_half define noundef <2 x half> @test_vector_half_frexp_half(<2 x half> noundef %h) { - ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0 - ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]]) - ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1 - ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]]) - ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { half, i32 } [[ie0]], 0 - ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { half, i32 } [[ie1]], 0 - ; CHECK-NEXT: insertelement <2 x half> poison, half [[ev00]], i64 0 - ; CHECK-NEXT: insertelement <2 x half> %{{.*}}, half [[ev01]], i64 1 +; CHECK-LABEL: define noundef <2 x half> @test_vector_half_frexp_half( +; CHECK-SAME: <2 x half> noundef [[H:%.*]]) { +; CHECK-NEXT: [[H_I0:%.*]] = extractelement <2 x half> [[H]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I0]]) +; CHECK-NEXT: [[H_I1:%.*]] = extractelement <2 x half> [[H]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I1]]) +; CHECK-NEXT: [[E0_ELEM0:%.*]] = extractvalue { half, i32 } [[R_I0]], 0 +; CHECK-NEXT: [[E0_ELEM01:%.*]] = extractvalue { half, i32 } [[R_I1]], 0 +; CHECK-NEXT: [[E0_UPTO0:%.*]] = insertelement <2 x half> poison, half [[E0_ELEM0]], i64 0 +; CHECK-NEXT: [[E0:%.*]] = insertelement <2 x half> [[E0_UPTO0]], half [[E0_ELEM01]], i64 1 +; CHECK-NEXT: ret <2 x half> [[E0]] +; %r = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h) %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0 ret <2 x half> %e0 } -; CHECK-LABEL: @test_vector_half_frexp_int define noundef <2 x i32> @test_vector_half_frexp_int(<2 x half> noundef %h) { - ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0 - ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]]) - ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1 - ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]]) - ; CHECK-NEXT: [[ev10:%.*]] = extractvalue { half, i32 } [[ie0]], 1 - ; CHECK-NEXT: [[ev11:%.*]] = extractvalue { half, i32 } [[ie1]], 1 - ; CHECK-NEXT: insertelement <2 x i32> poison, i32 [[ev10]], i64 0 - ; CHECK-NEXT: insertelement <2 x i32> %{{.*}}, i32 [[ev11]], i64 1 +; CHECK-LABEL: define noundef <2 x i32> @test_vector_half_frexp_int( +; CHECK-SAME: <2 x half> noundef [[H:%.*]]) { +; CHECK-NEXT: [[H_I0:%.*]] = extractelement <2 x half> [[H]], i64 0 +; CHECK-NEXT: [[R_I0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I0]]) +; CHECK-NEXT: [[H_I1:%.*]] = extractelement <2 x half> [[H]], i64 1 +; CHECK-NEXT: [[R_I1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I1]]) +; CHECK-NEXT: [[E1_ELEM1:%.*]] = extractvalue { half, i32 } [[R_I0]], 1 +; CHECK-NEXT: [[E1_ELEM11:%.*]] = extractvalue { half, i32 } [[R_I1]], 1 +; CHECK-NEXT: [[E1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[E1_ELEM1]], i64 0 +; CHECK-NEXT: [[E1:%.*]] = insertelement <2 x i32> [[E1_UPTO0]], i32 [[E1_ELEM11]], i64 1 +; CHECK-NEXT: ret <2 x i32> [[E1]] +; %r = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h) %e1 = extractvalue { <2 x half>, <2 x i32> } %r, 1 ret <2 x i32> %e1 } -; CHECK-LABEL: @test_vector_float_frexp_int define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f) { - ; CHECK: [[ee0:%.*]] = extractelement <2 x float> %f, i64 0 - ; CHECK-NEXT: [[ie0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee0]]) - ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x float> %f, i64 1 - ; CHECK-NEXT: [[ie1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee1]]) - ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { float, i32 } [[ie0]], 0 - ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { float, i32 } [[ie1]], 0 - ; CHECK-NEXT: insertelement <2 x float> poison, float [[ev00]], i64 0 - ; CHECK-NEXT: insertelement <2 x float> %{{.*}}, float [[ev01]], i64 1 - ; CHECK-NEXT: extractvalue { float, i32 } [[ie0]], 1 - ; CHECK-NEXT: extractvalue { float, i32 } [[ie1]], 1 +; CHECK-LABEL: define noundef <2 x float> @test_vector_float_frexp_int( +; CHECK-SAME: <2 x float> noundef [[F:%.*]]) { +; CHECK-NEXT: [[F_I0:%.*]] = extractelement <2 x float> [[F]], i64 0 +; CHECK-NEXT: [[DOTI0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[F_I0]]) +; CHECK-NEXT: [[F_I1:%.*]] = extractelement <2 x float> [[F]], i64 1 +; CHECK-NEXT: [[DOTI1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[F_I1]]) +; CHECK-NEXT: [[DOTELEM0:%.*]] = extractvalue { float, i32 } [[DOTI0]], 0 +; CHECK-NEXT: [[DOTELEM01:%.*]] = extractvalue { float, i32 } [[DOTI1]], 0 +; CHECK-NEXT: [[DOTUPTO010:%.*]] = insertelement <2 x float> poison, float [[DOTELEM0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[DOTUPTO010]], float [[DOTELEM01]], i64 1 +; CHECK-NEXT: [[DOTELEM1:%.*]] = extractvalue { float, i32 } [[DOTI0]], 1 +; CHECK-NEXT: [[DOTELEM12:%.*]] = extractvalue { float, i32 } [[DOTI1]], 1 +; CHECK-NEXT: ret <2 x float> [[TMP1]] +; %1 = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x float> %f) %2 = extractvalue { <2 x float>, <2 x i32> } %1, 0 %3 = extractvalue { <2 x float>, <2 x i32> } %1, 1 ret <2 x float> %2 } -; CHECK-LABEL: @test_vector_double_frexp_int define noundef <2 x double> @test_vector_double_frexp_int(<2 x double> noundef %d) { - ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0 - ; CHECK-NEXT: [[ie0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee0]]) - ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1 - ; CHECK-NEXT: [[ie1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee1]]) - ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { double, i32 } [[ie0]], 0 - ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { double, i32 } [[ie1]], 0 - ; CHECK-NEXT: insertelement <2 x double> poison, double [[ev00]], i64 0 - ; CHECK-NEXT: insertelement <2 x double> %{{.*}}, double [[ev01]], i64 1 - ; CHECK-NEXT: extractvalue { double, i32 } [[ie0]], 1 - ; CHECK-NEXT: extractvalue { double, i32 } [[ie1]], 1 +; CHECK-LABEL: define noundef <2 x double> @test_vector_double_frexp_int( +; CHECK-SAME: <2 x double> noundef [[D:%.*]]) { +; CHECK-NEXT: [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0 +; CHECK-NEXT: [[DOTI0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[D_I0]]) +; CHECK-NEXT: [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1 +; CHECK-NEXT: [[DOTI1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[D_I1]]) +; CHECK-NEXT: [[DOTELEM0:%.*]] = extractvalue { double, i32 } [[DOTI0]], 0 +; CHECK-NEXT: [[DOTELEM01:%.*]] = extractvalue { double, i32 } [[DOTI1]], 0 +; CHECK-NEXT: [[DOTUPTO010:%.*]] = insertelement <2 x double> poison, double [[DOTELEM0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[DOTUPTO010]], double [[DOTELEM01]], i64 1 +; CHECK-NEXT: [[DOTELEM1:%.*]] = extractvalue { double, i32 } [[DOTI0]], 1 +; CHECK-NEXT: [[DOTELEM12:%.*]] = extractvalue { double, i32 } [[DOTI1]], 1 +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; %1 = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %d) %2 = extractvalue { <2 x double>, <2 x i32> } %1, 0 %3 = extractvalue { <2 x double>, <2 x i32> } %1, 1 From 9cdfbc6a0c6ad8468fce1ca3b78ca342dbf2d40d Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Thu, 17 Oct 2024 23:48:01 -0400 Subject: [PATCH 7/9] address Tex's PR comments --- llvm/include/llvm/Analysis/VectorUtils.h | 5 +++++ llvm/lib/Analysis/VectorUtils.cpp | 10 ++++++++++ llvm/lib/Transforms/Scalar/Scalarizer.cpp | 14 ++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index e2dd4976f3906..467d5932cacf9 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -154,6 +154,11 @@ bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, /// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1. bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx); +/// Identifies if the vector form of the intrinsic that returns a struct is +/// overloaded at the struct element index \p RetIdx. +bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx); + /// Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns /// its intrinsic ID, in case it does not found it return not_intrinsic. diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 6b5251e0ad34e..37c443011719b 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -152,6 +152,16 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, } } +bool llvm::isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) { + switch (ID) { + case Intrinsic::frexp: + return RetIdx == 0 || RetIdx == 1; + default: + return RetIdx == 0; + } +} + /// Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns /// its ID, in case it does not found it return not_intrinsic. diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index f708eeeb29d31..b36423e0fea40 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -700,9 +700,9 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { /// element if possible for the intrinsic. bool ScalarizerVisitor::splitCall(CallInst &CI) { Type *CallType = CI.getType(); - bool AreAllMatchingVectors = isStructOfMatchingFixedVectors(CallType); + bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType); std::optional VS; - if (AreAllMatchingVectors) + if (AreAllVectorsOfMatchingSize) VS = getVectorSplit(CallType->getContainedType(0)); else VS = getVectorSplit(CallType); @@ -730,19 +730,17 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) Tys.push_back(VS->SplitTy); - if (AreAllMatchingVectors) { - Type *PrevType = CallType->getContainedType(0); + if (AreAllVectorsOfMatchingSize) { for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) { - Type *CurrType = cast(CallType->getContainedType(I)); - if (PrevType != CurrType) { - std::optional CurrVS = getVectorSplit(CurrType); + if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I)) { + std::optional CurrVS = getVectorSplit( + cast(CallType->getContainedType(I))); // This case does not seem to happen, but it is possible for // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit // is not returned and we will bailout of handling this call. if (!CurrVS) return false; Tys.push_back(CurrVS->SplitTy); - PrevType = CurrType; } } } From ee16a4b6d5d6a81728c3f541f146d82fc9c99c00 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 18 Oct 2024 16:09:47 -0400 Subject: [PATCH 8/9] address Tex's PR comments --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index b36423e0fea40..868bfbef1010d 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -732,16 +732,15 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { if (AreAllVectorsOfMatchingSize) { for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) { - if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I)) { - std::optional CurrVS = getVectorSplit( - cast(CallType->getContainedType(I))); - // This case does not seem to happen, but it is possible for - // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit - // is not returned and we will bailout of handling this call. - if (!CurrVS) - return false; + std::optional CurrVS = + getVectorSplit(cast(CallType->getContainedType(I))); + // This case does not seem to happen, but it is possible for + // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit + // is not returned and we will bailout of handling this call. + if (!CurrVS) + return false; + if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I)) Tys.push_back(CurrVS->SplitTy); - } } } // Assumes that any vector type has the same number of elements as the return From e00e740a7e5f5f4618787003bdf3002145fbd8bf Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 18 Oct 2024 17:18:43 -0400 Subject: [PATCH 9/9] add CurrVS->NumPacked != VS->NumPacked check --- llvm/lib/Transforms/Scalar/Scalarizer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 868bfbef1010d..772f4c6c35dde 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -737,7 +737,11 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { // This case does not seem to happen, but it is possible for // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit // is not returned and we will bailout of handling this call. - if (!CurrVS) + // The secondary bailout case is if NumPacked does not match. + // This can happen if ScalarizeMinBits is not set to the default. + // This means with certain ScalarizeMinBits intrinsics like frexp + // will only scalarize when the struct elements have the same bitness. + if (!CurrVS || CurrVS->NumPacked != VS->NumPacked) return false; if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I)) Tys.push_back(CurrVS->SplitTy);