From 9a5042f54f4b1a256b5e3724137b09aa95e8f5ff Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi@microsoft.com>
Date: Mon, 30 Sep 2024 10:11:38 -0400
Subject: [PATCH 1/9] [Scalarizer] A change to let the scalarizer pass be able
 to scalarize structs

---
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  3 +
 .../DirectX/DirectXTargetTransformInfo.cpp    | 77 ++++++++++---------
 llvm/lib/Transforms/Scalar/Scalarizer.cpp     | 69 ++++++++++++++++-
 llvm/test/CodeGen/DirectX/split-double.ll     | 10 +++
 4 files changed, 119 insertions(+), 40 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/split-double.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 27a437a83be6d..d8dd2beca4899 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -89,5 +89,8 @@ def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrCon
 def int_dx_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
 def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
 def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
+def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], 
+    [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
+
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 }
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 8ea31401121bc..231afd8ae3eea 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -1,38 +1,39 @@
-//===- DirectXTargetTransformInfo.cpp - DirectX TTI ---------------*- C++
-//-*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-//===----------------------------------------------------------------------===//
-
-#include "DirectXTargetTransformInfo.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IntrinsicsDirectX.h"
-
-using namespace llvm;
-
-bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
-                                                        unsigned ScalarOpdIdx) {
-  switch (ID) {
-  case Intrinsic::dx_wave_readlane:
-    return ScalarOpdIdx == 1;
-  default:
-    return false;
-  }
-}
-
-bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
-    Intrinsic::ID ID) const {
-  switch (ID) {
-  case Intrinsic::dx_frac:
-  case Intrinsic::dx_rsqrt:
-  case Intrinsic::dx_wave_readlane:
-    return true;
-  default:
-    return false;
-  }
-}
+//===- DirectXTargetTransformInfo.cpp - DirectX TTI ---------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+
+#include "DirectXTargetTransformInfo.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+
+using namespace llvm;
+
+bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
+                                                        unsigned ScalarOpdIdx) {
+  switch (ID) {
+  case Intrinsic::dx_wave_readlane:
+    return ScalarOpdIdx == 1;
+  default:
+    return false;
+  }
+}
+
+bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
+    Intrinsic::ID ID) const {
+  switch (ID) {
+  case Intrinsic::dx_frac:
+  case Intrinsic::dx_rsqrt:
+  case Intrinsic::dx_wave_readlane:
+  case Intrinsic::dx_splitdouble:
+    return true;
+  default:
+    return false;
+  }
+}
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index b1e4c7e52d99a..5a7253915695b 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -197,6 +197,11 @@ struct VectorLayout {
   uint64_t SplitSize = 0;
 };
 
+static bool isStructOfVectors(Type *Ty) {
+  return isa<StructType>(Ty) && Ty->getNumContainedTypes() > 0 &&
+         isa<FixedVectorType>(Ty->getContainedType(0));
+}
+
 /// Concatenate the given fragments to a single vector value of the type
 /// described in @p VS.
 static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
@@ -276,6 +281,7 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
   bool visitBitCastInst(BitCastInst &BCI);
   bool visitInsertElementInst(InsertElementInst &IEI);
   bool visitExtractElementInst(ExtractElementInst &EEI);
+  bool visitExtractValueInst(ExtractValueInst &EVI);
   bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
   bool visitPHINode(PHINode &PHI);
   bool visitLoadInst(LoadInst &LI);
@@ -552,7 +558,10 @@ void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
 // Determine how Ty is split, if at all.
 std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
   VectorSplit Split;
-  Split.VecTy = dyn_cast<FixedVectorType>(Ty);
+  if (isStructOfVectors(Ty))
+    Split.VecTy = cast<FixedVectorType>(Ty->getContainedType(0));
+  else
+    Split.VecTy = dyn_cast<FixedVectorType>(Ty);
   if (!Split.VecTy)
     return {};
 
@@ -1030,6 +1039,33 @@ bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
   return true;
 }
 
+bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
+  Value *Op = EVI.getOperand(0);
+  Type *OpTy = Op->getType();
+  ValueVector Res;
+  if (!isStructOfVectors(OpTy))
+    return false;
+  // Note: isStructOfVectors is also used in getVectorSplit.
+  // The intent is to bail on this visit if it isn't a struct
+  // of vectors. Downside is that when it is true we do two
+  // isStructOfVectors calls.
+  std::optional<VectorSplit> VS = getVectorSplit(OpTy);
+  if (!VS)
+    return false;
+  Scatterer Op0 = scatter(&EVI, Op, *VS);
+  assert(!EVI.getIndices().empty() && "Make sure an index exists");
+  // Note for our use case we only care about the top level index.
+  unsigned Index = EVI.getIndices()[0];
+  for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
+    Value *ResElem = Builder.CreateExtractValue(
+        Op0[OpIdx], Index, EVI.getName() + ".elem" + std::to_string(Index));
+    Res.push_back(ResElem);
+  }
+  // replaceUses(&EVI, Res);
+  gather(&EVI, Res, *VS);
+  return true;
+}
+
 bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
   std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
   if (!VS)
@@ -1196,7 +1232,7 @@ bool ScalarizerVisitor::finish() {
     if (!Op->use_empty()) {
       // The value is still needed, so recreate it using a series of
       // insertelements and/or shufflevectors.
-      Value *Res;
+      Value *Res = nullptr;
       if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
         BasicBlock *BB = Op->getParent();
         IRBuilder<> Builder(Op);
@@ -1209,6 +1245,35 @@ bool ScalarizerVisitor::finish() {
         Res = concatenate(Builder, CV, VS, Op->getName());
 
         Res->takeName(Op);
+      } else if (auto *Ty = dyn_cast<StructType>(Op->getType())) {
+        BasicBlock *BB = Op->getParent();
+        IRBuilder<> Builder(Op);
+        if (isa<PHINode>(Op))
+          Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
+
+        // Iterate over each element in the struct
+        uint NumOfStructElements = Ty->getNumElements();
+        SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
+        for (unsigned I = 0; I < NumOfStructElements; ++I) {
+          for (auto *CVelem : CV) {
+            Value *Elem = Builder.CreateExtractValue(
+                CVelem, I, Op->getName() + ".elem" + std::to_string(I));
+            ElemCV[I].push_back(Elem);
+          }
+        }
+        Res = PoisonValue::get(Ty);
+        for (unsigned I = 0; I < NumOfStructElements; ++I) {
+          Type *ElemTy = Ty->getElementType(I);
+          assert(isa<FixedVectorType>(ElemTy) &&
+                 "Only Structs of all FixedVectorType supported");
+          VectorSplit VS = *getVectorSplit(ElemTy);
+          assert(VS.NumFragments == CV.size());
+
+          Value *ConcatenatedVector =
+              concatenate(Builder, ElemCV[I], VS, Op->getName());
+          Res = Builder.CreateInsertValue(Res, ConcatenatedVector, I,
+                                          Op->getName() + ".insert");
+        }
       } else {
         assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
         Res = CV[0];
diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll
new file mode 100644
index 0000000000000..7d3c28efbc63c
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/split-double.ll
@@ -0,0 +1,10 @@
+
+; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) local_unnamed_addr {
+    %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
+    %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0
+    %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1
+    %3 = add <3 x i32> %1, %2
+    ret <3 x i32> %3
+}

From da2e3c16aaea1168ba0324258fbc359a82e1d0a3 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon@farzon.org>
Date: Tue, 8 Oct 2024 00:59:15 -0400
Subject: [PATCH 2/9] Add support for frexp. Move vector look up to just
 callInst and extractValue instruction visits

---
 llvm/include/llvm/IR/IntrinsicsDirectX.td |  1 -
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 53 ++++++++++++------
 llvm/test/CodeGen/DirectX/split-double.ll | 36 ++++++++++---
 llvm/test/Transforms/Scalarizer/frexp.ll  | 66 +++++++++++++++++++++++
 4 files changed, 133 insertions(+), 23 deletions(-)
 create mode 100644 llvm/test/Transforms/Scalarizer/frexp.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index d8dd2beca4899..e30d37f69f781 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -91,6 +91,5 @@ def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32
 def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
 def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], 
     [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
-
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
 }
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 5a7253915695b..f91d7ba755ac6 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -197,9 +197,15 @@ struct VectorLayout {
   uint64_t SplitSize = 0;
 };
 
-static bool isStructOfVectors(Type *Ty) {
-  return isa<StructType>(Ty) && Ty->getNumContainedTypes() > 0 &&
-         isa<FixedVectorType>(Ty->getContainedType(0));
+static bool isStructAllVectors(Type *Ty) {
+  if (!isa<StructType>(Ty))
+    return false;
+
+  for(unsigned I = 0; I < Ty->getNumContainedTypes(); I++)
+    if (!isa<FixedVectorType>(Ty->getContainedType(I)))
+      return false;
+
+  return true;
 }
 
 /// Concatenate the given fragments to a single vector value of the type
@@ -558,10 +564,7 @@ void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
 // Determine how Ty is split, if at all.
 std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
   VectorSplit Split;
-  if (isStructOfVectors(Ty))
-    Split.VecTy = cast<FixedVectorType>(Ty->getContainedType(0));
-  else
-    Split.VecTy = dyn_cast<FixedVectorType>(Ty);
+  Split.VecTy = dyn_cast<FixedVectorType>(Ty);
   if (!Split.VecTy)
     return {};
 
@@ -676,6 +679,10 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
 bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
   if (isTriviallyVectorizable(ID))
     return true;
+  switch (ID) {
+    case Intrinsic::frexp:
+    return true;
+  }
   return Intrinsic::isTargetIntrinsic(ID) &&
          TTI->isTargetIntrinsicTriviallyScalarizable(ID);
 }
@@ -683,7 +690,13 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
 /// If a call to a vector typed intrinsic function, split into a scalar call per
 /// element if possible for the intrinsic.
 bool ScalarizerVisitor::splitCall(CallInst &CI) {
-  std::optional<VectorSplit> VS = getVectorSplit(CI.getType());
+  Type* CallType = CI.getType();
+  bool areAllVectors = isStructAllVectors(CallType);
+   std::optional<VectorSplit> VS;
+  if (areAllVectors)
+    VS = getVectorSplit(CallType->getContainedType(0));
+  else
+    VS = getVectorSplit(CallType);
   if (!VS)
     return false;
 
@@ -708,6 +721,18 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
   if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
     Tys.push_back(VS->SplitTy);
 
+  if(areAllVectors) {
+    Type* PrevType = CallType->getContainedType(0);
+    Type* CallType = CI.getType();
+    for(unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
+      Type* CurrType = cast<FixedVectorType>(CallType->getContainedType(I));
+      if(PrevType != CurrType) {
+        std::optional<VectorSplit> CurrVS = getVectorSplit(CurrType);
+        Tys.push_back(CurrVS->SplitTy);
+        PrevType = CurrType;
+      }
+    }
+  }
   // Assumes that any vector type has the same number of elements as the return
   // vector type, which is true for all current intrinsics.
   for (unsigned I = 0; I != NumArgs; ++I) {
@@ -1043,15 +1068,13 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
   Value *Op = EVI.getOperand(0);
   Type *OpTy = Op->getType();
   ValueVector Res;
-  if (!isStructOfVectors(OpTy))
+  if (!isStructAllVectors(OpTy))
     return false;
-  // Note: isStructOfVectors is also used in getVectorSplit.
-  // The intent is to bail on this visit if it isn't a struct
-  // of vectors. Downside is that when it is true we do two
-  // isStructOfVectors calls.
-  std::optional<VectorSplit> VS = getVectorSplit(OpTy);
+  Type* VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
+  std::optional<VectorSplit> VS = getVectorSplit(VecType);
   if (!VS)
     return false;
+  IRBuilder<> Builder(&EVI);
   Scatterer Op0 = scatter(&EVI, Op, *VS);
   assert(!EVI.getIndices().empty() && "Make sure an index exists");
   // Note for our use case we only care about the top level index.
@@ -1252,7 +1275,7 @@ bool ScalarizerVisitor::finish() {
           Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
 
         // Iterate over each element in the struct
-        uint NumOfStructElements = Ty->getNumElements();
+        unsigned NumOfStructElements = Ty->getNumElements();
         SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
         for (unsigned I = 0; I < NumOfStructElements; ++I) {
           for (auto *CVelem : CV) {
diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll
index 7d3c28efbc63c..4fc5fdd1922a2 100644
--- a/llvm/test/CodeGen/DirectX/split-double.ll
+++ b/llvm/test/CodeGen/DirectX/split-double.ll
@@ -1,10 +1,32 @@
+; RUN: opt -S -scalarizer  -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
-; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+define void @test_vector_double_split_void(<3 x double> noundef %d) {
+  %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
+  ret void
+}
 
-define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) local_unnamed_addr {
-    %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
-    %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0
-    %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1
-    %3 = add <3 x i32> %1, %2
-    ret <3 x i32> %3
+define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
+  ; CHECK: [[ee0:%.*]] = extractelement <3 x double> %d, i64 0
+  ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <3 x double> %d, i64 1
+  ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <3 x double> %d, i64 2
+  ; CHECK: [[ie2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee2]])
+  ; CHECK: [[ev00:%.*]] = extractvalue { i32, i32 } [[ie0]], 0
+  ; CHECK: [[ev01:%.*]] = extractvalue { i32, i32 } [[ie1]], 0
+  ; CHECK: [[ev02:%.*]] = extractvalue { i32, i32 } [[ie2]], 0
+  ; CHECK: [[ev10:%.*]] = extractvalue { i32, i32 } [[ie0]], 1
+  ; CHECK: [[ev11:%.*]] = extractvalue { i32, i32 } [[ie1]], 1
+  ; CHECK: [[ev12:%.*]] = extractvalue { i32, i32 } [[ie2]], 1
+  ; CHECK: [[add1:%.*]] = add i32 [[ev00]], [[ev10]]
+  ; CHECK: [[add2:%.*]] = add i32 [[ev01]], [[ev11]]
+  ; CHECK: [[add3:%.*]] = add i32 [[ev02]], [[ev12]]
+  ; CHECK: insertelement <3 x i32> poison, i32 [[add1]], i64 0
+  ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add2]], i64 1
+  ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add3]], i64 2
+  %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
+  %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0
+  %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1
+  %3 = add <3 x i32> %1, %2
+  ret <3 x i32> %3
 }
diff --git a/llvm/test/Transforms/Scalarizer/frexp.ll b/llvm/test/Transforms/Scalarizer/frexp.ll
new file mode 100644
index 0000000000000..454042e6887c3
--- /dev/null
+++ b/llvm/test/Transforms/Scalarizer/frexp.ll
@@ -0,0 +1,66 @@
+; RUN: opt %s -passes='function(scalarizer<load-store>)' -S | FileCheck %s
+
+; CHECK-LABEL: @test_vector_half_frexp_half
+define noundef <2 x half> @test_vector_half_frexp_half(<2 x half> noundef %h) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0
+  ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]])
+  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1
+  ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]])
+  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { half, i32 } [[ie0]], 0
+  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { half, i32 } [[ie1]], 0
+  ; CHECK-NEXT: insertelement <2 x half> poison, half [[ev00]], i64 0
+  ; CHECK-NEXT: insertelement <2 x half> %{{.*}}, half [[ev01]], i64 1
+  %r =  call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
+  %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0
+  ret <2 x half> %e0
+}
+
+; CHECK-LABEL: @test_vector_half_frexp_int
+define noundef <2 x i32> @test_vector_half_frexp_int(<2 x half> noundef %h) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0
+  ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]])
+  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1
+  ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]])
+  ; CHECK-NEXT: [[ev10:%.*]] = extractvalue { half, i32 } [[ie0]], 1
+  ; CHECK-NEXT: [[ev11:%.*]] = extractvalue { half, i32 } [[ie1]], 1
+  ; CHECK-NEXT: insertelement <2 x i32> poison, i32 [[ev10]], i64 0
+  ; CHECK-NEXT: insertelement <2 x i32> %{{.*}}, i32 [[ev11]], i64 1
+  %r =  call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
+  %e1 = extractvalue { <2 x half>, <2 x i32> } %r, 1
+  ret <2 x i32> %e1
+}
+
+
+define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x float> %f, i64 0
+  ; CHECK-NEXT: [[ie0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee0]])
+  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x float> %f, i64 1
+  ; CHECK-NEXT: [[ie1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee1]])
+  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { float, i32 } [[ie0]], 0
+  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { float, i32 } [[ie1]], 0
+  ; CHECK-NEXT: insertelement <2 x float> poison, float [[ev00]], i64 0
+  ; CHECK-NEXT: insertelement <2 x float> %{{.*}}, float [[ev01]], i64 1
+  ; CHECK-NEXT: extractvalue { float, i32 } [[ie0]], 1
+  ; CHECK-NEXT: extractvalue { float, i32 } [[ie1]], 1
+  %1 =  call { <2 x float>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x float> %f)
+  %2 = extractvalue { <2 x float>, <2 x i32> } %1, 0
+  %3 = extractvalue { <2 x float>, <2 x i32> } %1, 1
+  ret <2 x float> %2
+}
+
+define noundef <2 x double> @test_vector_double_frexp_int(<2 x double> noundef %d) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0
+  ; CHECK-NEXT: [[ie0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee0]])
+  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1
+  ; CHECK-NEXT: [[ie1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee1]])
+  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { double, i32 } [[ie0]], 0
+  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { double, i32 } [[ie1]], 0
+  ; CHECK-NEXT: insertelement <2 x double> poison, double [[ev00]], i64 0
+  ; CHECK-NEXT: insertelement <2 x double> %{{.*}}, double [[ev01]], i64 1
+  ; CHECK-NEXT: extractvalue { double, i32 } [[ie0]], 1
+  ; CHECK-NEXT: extractvalue { double, i32 } [[ie1]], 1
+  %1 =  call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %d)
+  %2 = extractvalue { <2 x double>, <2 x i32> } %1, 0
+  %3 = extractvalue { <2 x double>, <2 x i32> } %1, 1
+  ret <2 x double> %2
+}

From c5383f3efca4577a8a0197b17a2d8087f9e06f9f Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon@farzon.org>
Date: Tue, 8 Oct 2024 03:44:57 -0400
Subject: [PATCH 3/9] fix up in prep for PR.

---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 41 +++++++++++++----------
 llvm/test/CodeGen/DirectX/split-double.ll | 14 ++++++--
 llvm/test/Transforms/Scalarizer/frexp.ll  |  3 +-
 3 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index f91d7ba755ac6..f66f13dba6358 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -200,11 +200,17 @@ struct VectorLayout {
 static bool isStructAllVectors(Type *Ty) {
   if (!isa<StructType>(Ty))
     return false;
-
-  for(unsigned I = 0; I < Ty->getNumContainedTypes(); I++)
-    if (!isa<FixedVectorType>(Ty->getContainedType(I)))
+  if (Ty->getNumContainedTypes() < 1)
+    return false;
+  FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(0));
+  if (!VecTy)
+    return false;
+  unsigned VecSize = VecTy->getNumElements();
+  for (unsigned I = 1; I < Ty->getNumContainedTypes(); I++) {
+    VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(I));
+    if (!VecTy || VecSize != VecTy->getNumElements())
       return false;
-
+  }
   return true;
 }
 
@@ -679,8 +685,9 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
 bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
   if (isTriviallyVectorizable(ID))
     return true;
+  // TODO: investigate vectorizable frexp
   switch (ID) {
-    case Intrinsic::frexp:
+  case Intrinsic::frexp:
     return true;
   }
   return Intrinsic::isTargetIntrinsic(ID) &&
@@ -690,10 +697,10 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
 /// If a call to a vector typed intrinsic function, split into a scalar call per
 /// element if possible for the intrinsic.
 bool ScalarizerVisitor::splitCall(CallInst &CI) {
-  Type* CallType = CI.getType();
-  bool areAllVectors = isStructAllVectors(CallType);
-   std::optional<VectorSplit> VS;
-  if (areAllVectors)
+  Type *CallType = CI.getType();
+  bool AreAllVectors = isStructAllVectors(CallType);
+  std::optional<VectorSplit> VS;
+  if (AreAllVectors)
     VS = getVectorSplit(CallType->getContainedType(0));
   else
     VS = getVectorSplit(CallType);
@@ -721,12 +728,12 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
   if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
     Tys.push_back(VS->SplitTy);
 
-  if(areAllVectors) {
-    Type* PrevType = CallType->getContainedType(0);
-    Type* CallType = CI.getType();
-    for(unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
-      Type* CurrType = cast<FixedVectorType>(CallType->getContainedType(I));
-      if(PrevType != CurrType) {
+  if (AreAllVectors) {
+    Type *PrevType = CallType->getContainedType(0);
+    Type *CallType = CI.getType();
+    for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
+      Type *CurrType = cast<FixedVectorType>(CallType->getContainedType(I));
+      if (PrevType != CurrType) {
         std::optional<VectorSplit> CurrVS = getVectorSplit(CurrType);
         Tys.push_back(CurrVS->SplitTy);
         PrevType = CurrType;
@@ -1070,7 +1077,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
   ValueVector Res;
   if (!isStructAllVectors(OpTy))
     return false;
-  Type* VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
+  Type *VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
   std::optional<VectorSplit> VS = getVectorSplit(VecType);
   if (!VS)
     return false;
@@ -1084,7 +1091,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
         Op0[OpIdx], Index, EVI.getName() + ".elem" + std::to_string(Index));
     Res.push_back(ResElem);
   }
-  // replaceUses(&EVI, Res);
+
   gather(&EVI, Res, *VS);
   return true;
 }
diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll
index 4fc5fdd1922a2..9b70e87ba4794 100644
--- a/llvm/test/CodeGen/DirectX/split-double.ll
+++ b/llvm/test/CodeGen/DirectX/split-double.ll
@@ -1,10 +1,18 @@
-; RUN: opt -S -scalarizer  -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -passes='function(scalarizer<load-store>)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
-define void @test_vector_double_split_void(<3 x double> noundef %d) {
-  %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
+; CHECK-LABEL: @test_vector_double_split_void
+define void @test_vector_double_split_void(<2 x double> noundef %d) {
+  ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0
+  ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1
+  ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]])
+  ; CHECK-NOT: extractvalue { i32, i32 } {{.*}}, 0
+  ; CHECK-NOT: insertelement <2 x i32> {{.*}}, i32 {{.*}}, i64 0
+  %hlsl.asuint = call { <2 x i32>, <2 x i32> }  @llvm.dx.splitdouble.v2i32(<2 x double> %d)
   ret void
 }
 
+; CHECK-LABEL: @test_vector_double_split
 define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
   ; CHECK: [[ee0:%.*]] = extractelement <3 x double> %d, i64 0
   ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]])
diff --git a/llvm/test/Transforms/Scalarizer/frexp.ll b/llvm/test/Transforms/Scalarizer/frexp.ll
index 454042e6887c3..48159b45c1896 100644
--- a/llvm/test/Transforms/Scalarizer/frexp.ll
+++ b/llvm/test/Transforms/Scalarizer/frexp.ll
@@ -30,7 +30,7 @@ define noundef <2 x i32> @test_vector_half_frexp_int(<2 x half> noundef %h) {
   ret <2 x i32> %e1
 }
 
-
+; CHECK-LABEL: @test_vector_float_frexp_int
 define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f) {
   ; CHECK: [[ee0:%.*]] = extractelement <2 x float> %f, i64 0
   ; CHECK-NEXT: [[ie0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee0]])
@@ -48,6 +48,7 @@ define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f)
   ret <2 x float> %2
 }
 
+; CHECK-LABEL: @test_vector_double_frexp_int
 define noundef <2 x double> @test_vector_double_frexp_int(<2 x double> noundef %d) {
   ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0
   ; CHECK-NEXT: [[ie0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee0]])

From 8f7ed3beec93d26576c8d06f501f319863e9b4e5 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon@farzon.org>
Date: Tue, 15 Oct 2024 18:06:49 -0400
Subject: [PATCH 4/9] address pr comments

---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index f66f13dba6358..44e2122f10b4b 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -197,16 +197,17 @@ struct VectorLayout {
   uint64_t SplitSize = 0;
 };
 
-static bool isStructAllVectors(Type *Ty) {
+static bool isStructOfMatchingFixedVectors(Type *Ty) {
   if (!isa<StructType>(Ty))
     return false;
-  if (Ty->getNumContainedTypes() < 1)
+  unsigned StructSize = Ty->getNumContainedTypes();
+  if (StructSize < 1)
     return false;
   FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(0));
   if (!VecTy)
     return false;
   unsigned VecSize = VecTy->getNumElements();
-  for (unsigned I = 1; I < Ty->getNumContainedTypes(); I++) {
+  for (unsigned I = 1; I < StructSize; I++) {
     VecTy = dyn_cast<FixedVectorType>(Ty->getContainedType(I));
     if (!VecTy || VecSize != VecTy->getNumElements())
       return false;
@@ -685,7 +686,8 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
 bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
   if (isTriviallyVectorizable(ID))
     return true;
-  // TODO: investigate vectorizable frexp
+  // TODO: Move frexp to isTriviallyVectorizable.
+  // https://github.com/llvm/llvm-project/issues/112408
   switch (ID) {
   case Intrinsic::frexp:
     return true;
@@ -698,7 +700,7 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
 /// element if possible for the intrinsic.
 bool ScalarizerVisitor::splitCall(CallInst &CI) {
   Type *CallType = CI.getType();
-  bool AreAllVectors = isStructAllVectors(CallType);
+  bool AreAllVectors = isStructOfMatchingFixedVectors(CallType);
   std::optional<VectorSplit> VS;
   if (AreAllVectors)
     VS = getVectorSplit(CallType->getContainedType(0));
@@ -730,7 +732,6 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
 
   if (AreAllVectors) {
     Type *PrevType = CallType->getContainedType(0);
-    Type *CallType = CI.getType();
     for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
       Type *CurrType = cast<FixedVectorType>(CallType->getContainedType(I));
       if (PrevType != CurrType) {
@@ -1075,7 +1076,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
   Value *Op = EVI.getOperand(0);
   Type *OpTy = Op->getType();
   ValueVector Res;
-  if (!isStructAllVectors(OpTy))
+  if (!isStructOfMatchingFixedVectors(OpTy))
     return false;
   Type *VecType = cast<FixedVectorType>(OpTy->getContainedType(0));
   std::optional<VectorSplit> VS = getVectorSplit(VecType);
@@ -1262,7 +1263,7 @@ bool ScalarizerVisitor::finish() {
     if (!Op->use_empty()) {
       // The value is still needed, so recreate it using a series of
       // insertelements and/or shufflevectors.
-      Value *Res = nullptr;
+      Value *Res;
       if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
         BasicBlock *BB = Op->getParent();
         IRBuilder<> Builder(Op);
@@ -1287,7 +1288,7 @@ bool ScalarizerVisitor::finish() {
         for (unsigned I = 0; I < NumOfStructElements; ++I) {
           for (auto *CVelem : CV) {
             Value *Elem = Builder.CreateExtractValue(
-                CVelem, I, Op->getName() + ".elem" + std::to_string(I));
+                CVelem, I, Op->getName() + ".elem" + Twine(I));
             ElemCV[I].push_back(Elem);
           }
         }

From dfa4963316ba4542190f2b0e8548defea4abd6c6 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon@farzon.org>
Date: Wed, 16 Oct 2024 12:44:01 -0400
Subject: [PATCH 5/9] add safety check in case `getVectorSplit` fails.

---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 44e2122f10b4b..539c1568c46e7 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -700,9 +700,9 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
 /// element if possible for the intrinsic.
 bool ScalarizerVisitor::splitCall(CallInst &CI) {
   Type *CallType = CI.getType();
-  bool AreAllVectors = isStructOfMatchingFixedVectors(CallType);
+  bool AreAllMatchingVectors = isStructOfMatchingFixedVectors(CallType);
   std::optional<VectorSplit> VS;
-  if (AreAllVectors)
+  if (AreAllMatchingVectors)
     VS = getVectorSplit(CallType->getContainedType(0));
   else
     VS = getVectorSplit(CallType);
@@ -730,12 +730,17 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
   if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
     Tys.push_back(VS->SplitTy);
 
-  if (AreAllVectors) {
+  if (AreAllMatchingVectors) {
     Type *PrevType = CallType->getContainedType(0);
     for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
       Type *CurrType = cast<FixedVectorType>(CallType->getContainedType(I));
       if (PrevType != CurrType) {
         std::optional<VectorSplit> CurrVS = getVectorSplit(CurrType);
+        // This case does not seem to happen, but it is possible for
+        // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit
+        // is not returned and we will bailout of handling this call.
+        if (!CurrVS)
+          return false;
         Tys.push_back(CurrVS->SplitTy);
         PrevType = CurrType;
       }

From 5e8cb8bab1fc01ffd0950a40db36e7880d2b11b8 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi@microsoft.com>
Date: Thu, 17 Oct 2024 14:36:38 -0400
Subject: [PATCH 6/9] address pr comments

---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp |   2 +-
 llvm/test/CodeGen/DirectX/split-double.ll |  59 ++++++------
 llvm/test/Transforms/Scalarizer/frexp.ll  | 108 ++++++++++++++--------
 3 files changed, 100 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 539c1568c46e7..f708eeeb29d31 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -1094,7 +1094,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
   unsigned Index = EVI.getIndices()[0];
   for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
     Value *ResElem = Builder.CreateExtractValue(
-        Op0[OpIdx], Index, EVI.getName() + ".elem" + std::to_string(Index));
+        Op0[OpIdx], Index, EVI.getName() + ".elem" + Twine(Index));
     Res.push_back(ResElem);
   }
 
diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll
index 9b70e87ba4794..759590fa56279 100644
--- a/llvm/test/CodeGen/DirectX/split-double.ll
+++ b/llvm/test/CodeGen/DirectX/split-double.ll
@@ -1,37 +1,42 @@
-; RUN: opt -passes='function(scalarizer<load-store>)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
-; CHECK-LABEL: @test_vector_double_split_void
 define void @test_vector_double_split_void(<2 x double> noundef %d) {
-  ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0
-  ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]])
-  ; CHECK: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1
-  ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]])
-  ; CHECK-NOT: extractvalue { i32, i32 } {{.*}}, 0
-  ; CHECK-NOT: insertelement <2 x i32> {{.*}}, i32 {{.*}}, i64 0
+; CHECK-LABEL: define void @test_vector_double_split_void(
+; CHECK-SAME: <2 x double> noundef [[D:%.*]]) {
+; CHECK-NEXT:    [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0
+; CHECK-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
+; CHECK-NEXT:    [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1
+; CHECK-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
+; CHECK-NEXT:    ret void
+;
   %hlsl.asuint = call { <2 x i32>, <2 x i32> }  @llvm.dx.splitdouble.v2i32(<2 x double> %d)
   ret void
 }
 
-; CHECK-LABEL: @test_vector_double_split
 define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
-  ; CHECK: [[ee0:%.*]] = extractelement <3 x double> %d, i64 0
-  ; CHECK: [[ie0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee0]])
-  ; CHECK: [[ee1:%.*]] = extractelement <3 x double> %d, i64 1
-  ; CHECK: [[ie1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee1]])
-  ; CHECK: [[ee2:%.*]] = extractelement <3 x double> %d, i64 2
-  ; CHECK: [[ie2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[ee2]])
-  ; CHECK: [[ev00:%.*]] = extractvalue { i32, i32 } [[ie0]], 0
-  ; CHECK: [[ev01:%.*]] = extractvalue { i32, i32 } [[ie1]], 0
-  ; CHECK: [[ev02:%.*]] = extractvalue { i32, i32 } [[ie2]], 0
-  ; CHECK: [[ev10:%.*]] = extractvalue { i32, i32 } [[ie0]], 1
-  ; CHECK: [[ev11:%.*]] = extractvalue { i32, i32 } [[ie1]], 1
-  ; CHECK: [[ev12:%.*]] = extractvalue { i32, i32 } [[ie2]], 1
-  ; CHECK: [[add1:%.*]] = add i32 [[ev00]], [[ev10]]
-  ; CHECK: [[add2:%.*]] = add i32 [[ev01]], [[ev11]]
-  ; CHECK: [[add3:%.*]] = add i32 [[ev02]], [[ev12]]
-  ; CHECK: insertelement <3 x i32> poison, i32 [[add1]], i64 0
-  ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add2]], i64 1
-  ; CHECK: insertelement <3 x i32> %{{.*}}, i32 [[add3]], i64 2
+; CHECK-LABEL: define noundef <3 x i32> @test_vector_double_split(
+; CHECK-SAME: <3 x double> noundef [[D:%.*]]) {
+; CHECK-NEXT:    [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0
+; CHECK-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
+; CHECK-NEXT:    [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1
+; CHECK-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
+; CHECK-NEXT:    [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2
+; CHECK-NEXT:    [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]])
+; CHECK-NEXT:    [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0
+; CHECK-NEXT:    [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0
+; CHECK-NEXT:    [[DOTELEM02:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 0
+; CHECK-NEXT:    [[DOTELEM1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1
+; CHECK-NEXT:    [[DOTELEM13:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 1
+; CHECK-NEXT:    [[DOTELEM14:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 1
+; CHECK-NEXT:    [[DOTI0:%.*]] = add i32 [[DOTELEM0]], [[DOTELEM1]]
+; CHECK-NEXT:    [[DOTI1:%.*]] = add i32 [[DOTELEM01]], [[DOTELEM13]]
+; CHECK-NEXT:    [[DOTI2:%.*]] = add i32 [[DOTELEM02]], [[DOTELEM14]]
+; CHECK-NEXT:    [[DOTUPTO015:%.*]] = insertelement <3 x i32> poison, i32 [[DOTI0]], i64 0
+; CHECK-NEXT:    [[DOTUPTO116:%.*]] = insertelement <3 x i32> [[DOTUPTO015]], i32 [[DOTI1]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> [[DOTUPTO116]], i32 [[DOTI2]], i64 2
+; CHECK-NEXT:    ret <3 x i32> [[TMP1]]
+;
   %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
   %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0
   %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1
diff --git a/llvm/test/Transforms/Scalarizer/frexp.ll b/llvm/test/Transforms/Scalarizer/frexp.ll
index 48159b45c1896..6397832f6648c 100644
--- a/llvm/test/Transforms/Scalarizer/frexp.ll
+++ b/llvm/test/Transforms/Scalarizer/frexp.ll
@@ -1,65 +1,91 @@
-; RUN: opt %s -passes='function(scalarizer<load-store>)' -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt %s -passes='function(scalarizer)' -S | FileCheck %s
+
+define void @test_vector_frexp_void(<2 x double> noundef %d) {
+; CHECK-LABEL: define void @test_vector_frexp_void(
+; CHECK-SAME: <2 x double> noundef [[D:%.*]]) {
+; CHECK-NEXT:    [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0
+; CHECK-NEXT:    [[DOTI0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[D_I0]])
+; CHECK-NEXT:    [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1
+; CHECK-NEXT:    [[DOTI1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[D_I1]])
+; CHECK-NEXT:    ret void
+;
+  %1 =  call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %d)
+  ret void
+}
 
-; CHECK-LABEL: @test_vector_half_frexp_half
 define noundef <2 x half> @test_vector_half_frexp_half(<2 x half> noundef %h) {
-  ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0
-  ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]])
-  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1
-  ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]])
-  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { half, i32 } [[ie0]], 0
-  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { half, i32 } [[ie1]], 0
-  ; CHECK-NEXT: insertelement <2 x half> poison, half [[ev00]], i64 0
-  ; CHECK-NEXT: insertelement <2 x half> %{{.*}}, half [[ev01]], i64 1
+; CHECK-LABEL: define noundef <2 x half> @test_vector_half_frexp_half(
+; CHECK-SAME: <2 x half> noundef [[H:%.*]]) {
+; CHECK-NEXT:    [[H_I0:%.*]] = extractelement <2 x half> [[H]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I0]])
+; CHECK-NEXT:    [[H_I1:%.*]] = extractelement <2 x half> [[H]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I1]])
+; CHECK-NEXT:    [[E0_ELEM0:%.*]] = extractvalue { half, i32 } [[R_I0]], 0
+; CHECK-NEXT:    [[E0_ELEM01:%.*]] = extractvalue { half, i32 } [[R_I1]], 0
+; CHECK-NEXT:    [[E0_UPTO0:%.*]] = insertelement <2 x half> poison, half [[E0_ELEM0]], i64 0
+; CHECK-NEXT:    [[E0:%.*]] = insertelement <2 x half> [[E0_UPTO0]], half [[E0_ELEM01]], i64 1
+; CHECK-NEXT:    ret <2 x half> [[E0]]
+;
   %r =  call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
   %e0 = extractvalue { <2 x half>, <2 x i32> } %r, 0
   ret <2 x half> %e0
 }
 
-; CHECK-LABEL: @test_vector_half_frexp_int
 define noundef <2 x i32> @test_vector_half_frexp_int(<2 x half> noundef %h) {
-  ; CHECK: [[ee0:%.*]] = extractelement <2 x half> %h, i64 0
-  ; CHECK-NEXT: [[ie0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee0]])
-  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x half> %h, i64 1
-  ; CHECK-NEXT: [[ie1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[ee1]])
-  ; CHECK-NEXT: [[ev10:%.*]] = extractvalue { half, i32 } [[ie0]], 1
-  ; CHECK-NEXT: [[ev11:%.*]] = extractvalue { half, i32 } [[ie1]], 1
-  ; CHECK-NEXT: insertelement <2 x i32> poison, i32 [[ev10]], i64 0
-  ; CHECK-NEXT: insertelement <2 x i32> %{{.*}}, i32 [[ev11]], i64 1
+; CHECK-LABEL: define noundef <2 x i32> @test_vector_half_frexp_int(
+; CHECK-SAME: <2 x half> noundef [[H:%.*]]) {
+; CHECK-NEXT:    [[H_I0:%.*]] = extractelement <2 x half> [[H]], i64 0
+; CHECK-NEXT:    [[R_I0:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I0]])
+; CHECK-NEXT:    [[H_I1:%.*]] = extractelement <2 x half> [[H]], i64 1
+; CHECK-NEXT:    [[R_I1:%.*]] = call { half, i32 } @llvm.frexp.f16.i32(half [[H_I1]])
+; CHECK-NEXT:    [[E1_ELEM1:%.*]] = extractvalue { half, i32 } [[R_I0]], 1
+; CHECK-NEXT:    [[E1_ELEM11:%.*]] = extractvalue { half, i32 } [[R_I1]], 1
+; CHECK-NEXT:    [[E1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[E1_ELEM1]], i64 0
+; CHECK-NEXT:    [[E1:%.*]] = insertelement <2 x i32> [[E1_UPTO0]], i32 [[E1_ELEM11]], i64 1
+; CHECK-NEXT:    ret <2 x i32> [[E1]]
+;
   %r =  call { <2 x half>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x half> %h)
   %e1 = extractvalue { <2 x half>, <2 x i32> } %r, 1
   ret <2 x i32> %e1
 }
 
-; CHECK-LABEL: @test_vector_float_frexp_int
 define noundef <2 x float> @test_vector_float_frexp_int(<2 x float> noundef %f) {
-  ; CHECK: [[ee0:%.*]] = extractelement <2 x float> %f, i64 0
-  ; CHECK-NEXT: [[ie0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee0]])
-  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x float> %f, i64 1
-  ; CHECK-NEXT: [[ie1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[ee1]])
-  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { float, i32 } [[ie0]], 0
-  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { float, i32 } [[ie1]], 0
-  ; CHECK-NEXT: insertelement <2 x float> poison, float [[ev00]], i64 0
-  ; CHECK-NEXT: insertelement <2 x float> %{{.*}}, float [[ev01]], i64 1
-  ; CHECK-NEXT: extractvalue { float, i32 } [[ie0]], 1
-  ; CHECK-NEXT: extractvalue { float, i32 } [[ie1]], 1
+; CHECK-LABEL: define noundef <2 x float> @test_vector_float_frexp_int(
+; CHECK-SAME: <2 x float> noundef [[F:%.*]]) {
+; CHECK-NEXT:    [[F_I0:%.*]] = extractelement <2 x float> [[F]], i64 0
+; CHECK-NEXT:    [[DOTI0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[F_I0]])
+; CHECK-NEXT:    [[F_I1:%.*]] = extractelement <2 x float> [[F]], i64 1
+; CHECK-NEXT:    [[DOTI1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[F_I1]])
+; CHECK-NEXT:    [[DOTELEM0:%.*]] = extractvalue { float, i32 } [[DOTI0]], 0
+; CHECK-NEXT:    [[DOTELEM01:%.*]] = extractvalue { float, i32 } [[DOTI1]], 0
+; CHECK-NEXT:    [[DOTUPTO010:%.*]] = insertelement <2 x float> poison, float [[DOTELEM0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> [[DOTUPTO010]], float [[DOTELEM01]], i64 1
+; CHECK-NEXT:    [[DOTELEM1:%.*]] = extractvalue { float, i32 } [[DOTI0]], 1
+; CHECK-NEXT:    [[DOTELEM12:%.*]] = extractvalue { float, i32 } [[DOTI1]], 1
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
   %1 =  call { <2 x float>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x float> %f)
   %2 = extractvalue { <2 x float>, <2 x i32> } %1, 0
   %3 = extractvalue { <2 x float>, <2 x i32> } %1, 1
   ret <2 x float> %2
 }
 
-; CHECK-LABEL: @test_vector_double_frexp_int
 define noundef <2 x double> @test_vector_double_frexp_int(<2 x double> noundef %d) {
-  ; CHECK: [[ee0:%.*]] = extractelement <2 x double> %d, i64 0
-  ; CHECK-NEXT: [[ie0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee0]])
-  ; CHECK-NEXT: [[ee1:%.*]] = extractelement <2 x double> %d, i64 1
-  ; CHECK-NEXT: [[ie1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[ee1]])
-  ; CHECK-NEXT: [[ev00:%.*]] = extractvalue { double, i32 } [[ie0]], 0
-  ; CHECK-NEXT: [[ev01:%.*]] = extractvalue { double, i32 } [[ie1]], 0
-  ; CHECK-NEXT: insertelement <2 x double> poison, double [[ev00]], i64 0
-  ; CHECK-NEXT: insertelement <2 x double> %{{.*}}, double [[ev01]], i64 1
-  ; CHECK-NEXT: extractvalue { double, i32 } [[ie0]], 1
-  ; CHECK-NEXT: extractvalue { double, i32 } [[ie1]], 1
+; CHECK-LABEL: define noundef <2 x double> @test_vector_double_frexp_int(
+; CHECK-SAME: <2 x double> noundef [[D:%.*]]) {
+; CHECK-NEXT:    [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0
+; CHECK-NEXT:    [[DOTI0:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[D_I0]])
+; CHECK-NEXT:    [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1
+; CHECK-NEXT:    [[DOTI1:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[D_I1]])
+; CHECK-NEXT:    [[DOTELEM0:%.*]] = extractvalue { double, i32 } [[DOTI0]], 0
+; CHECK-NEXT:    [[DOTELEM01:%.*]] = extractvalue { double, i32 } [[DOTI1]], 0
+; CHECK-NEXT:    [[DOTUPTO010:%.*]] = insertelement <2 x double> poison, double [[DOTELEM0]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[DOTUPTO010]], double [[DOTELEM01]], i64 1
+; CHECK-NEXT:    [[DOTELEM1:%.*]] = extractvalue { double, i32 } [[DOTI0]], 1
+; CHECK-NEXT:    [[DOTELEM12:%.*]] = extractvalue { double, i32 } [[DOTI1]], 1
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
   %1 =  call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %d)
   %2 = extractvalue { <2 x double>, <2 x i32> } %1, 0
   %3 = extractvalue { <2 x double>, <2 x i32> } %1, 1

From 9cdfbc6a0c6ad8468fce1ca3b78ca342dbf2d40d Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi@microsoft.com>
Date: Thu, 17 Oct 2024 23:48:01 -0400
Subject: [PATCH 7/9] address Tex's PR comments

---
 llvm/include/llvm/Analysis/VectorUtils.h  |  5 +++++
 llvm/lib/Analysis/VectorUtils.cpp         | 10 ++++++++++
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 14 ++++++--------
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index e2dd4976f3906..467d5932cacf9 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -154,6 +154,11 @@ bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
 /// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
 bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx);
 
+/// Identifies if the vector form of the intrinsic that returns a struct is
+/// overloaded at the struct element index \p RetIdx.
+bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,
+                                                      int RetIdx);
+
 /// Returns intrinsic ID for call.
 /// For the input call instruction it finds mapping intrinsic and returns
 /// its intrinsic ID, in case it does not found it return not_intrinsic.
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 6b5251e0ad34e..37c443011719b 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -152,6 +152,16 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
   }
 }
 
+bool llvm::isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,
+                                                            int RetIdx) {
+  switch (ID) {
+  case Intrinsic::frexp:
+    return RetIdx == 0 || RetIdx == 1;
+  default:
+    return RetIdx == 0;
+  }
+}
+
 /// Returns intrinsic ID for call.
 /// For the input call instruction it finds mapping intrinsic and returns
 /// its ID, in case it does not found it return not_intrinsic.
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index f708eeeb29d31..b36423e0fea40 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -700,9 +700,9 @@ bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
 /// element if possible for the intrinsic.
 bool ScalarizerVisitor::splitCall(CallInst &CI) {
   Type *CallType = CI.getType();
-  bool AreAllMatchingVectors = isStructOfMatchingFixedVectors(CallType);
+  bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType);
   std::optional<VectorSplit> VS;
-  if (AreAllMatchingVectors)
+  if (AreAllVectorsOfMatchingSize)
     VS = getVectorSplit(CallType->getContainedType(0));
   else
     VS = getVectorSplit(CallType);
@@ -730,19 +730,17 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
   if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
     Tys.push_back(VS->SplitTy);
 
-  if (AreAllMatchingVectors) {
-    Type *PrevType = CallType->getContainedType(0);
+  if (AreAllVectorsOfMatchingSize) {
     for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
-      Type *CurrType = cast<FixedVectorType>(CallType->getContainedType(I));
-      if (PrevType != CurrType) {
-        std::optional<VectorSplit> CurrVS = getVectorSplit(CurrType);
+      if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I)) {
+        std::optional<VectorSplit> CurrVS = getVectorSplit(
+            cast<FixedVectorType>(CallType->getContainedType(I)));
         // This case does not seem to happen, but it is possible for
         // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit
         // is not returned and we will bailout of handling this call.
         if (!CurrVS)
           return false;
         Tys.push_back(CurrVS->SplitTy);
-        PrevType = CurrType;
       }
     }
   }

From ee16a4b6d5d6a81728c3f541f146d82fc9c99c00 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi@microsoft.com>
Date: Fri, 18 Oct 2024 16:09:47 -0400
Subject: [PATCH 8/9] address Tex's PR comments

---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index b36423e0fea40..868bfbef1010d 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -732,16 +732,15 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
 
   if (AreAllVectorsOfMatchingSize) {
     for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
-      if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I)) {
-        std::optional<VectorSplit> CurrVS = getVectorSplit(
-            cast<FixedVectorType>(CallType->getContainedType(I)));
-        // This case does not seem to happen, but it is possible for
-        // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit
-        // is not returned and we will bailout of handling this call.
-        if (!CurrVS)
-          return false;
+      std::optional<VectorSplit> CurrVS =
+          getVectorSplit(cast<FixedVectorType>(CallType->getContainedType(I)));
+      // This case does not seem to happen, but it is possible for
+      // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit
+      // is not returned and we will bailout of handling this call.
+      if (!CurrVS)
+        return false;
+      if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I))
         Tys.push_back(CurrVS->SplitTy);
-      }
     }
   }
   // Assumes that any vector type has the same number of elements as the return

From e00e740a7e5f5f4618787003bdf3002145fbd8bf Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi@microsoft.com>
Date: Fri, 18 Oct 2024 17:18:43 -0400
Subject: [PATCH 9/9] add CurrVS->NumPacked != VS->NumPacked check

---
 llvm/lib/Transforms/Scalar/Scalarizer.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 868bfbef1010d..772f4c6c35dde 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -737,7 +737,11 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
       // This case does not seem to happen, but it is possible for
       // VectorSplit.NumPacked >= NumElems. If that happens a VectorSplit
       // is not returned and we will bailout of handling this call.
-      if (!CurrVS)
+      // The secondary bailout case is if NumPacked does not match.
+      // This can happen if ScalarizeMinBits is not set to the default.
+      // This means with certain ScalarizeMinBits intrinsics like frexp
+      // will only scalarize when the struct elements have the same bitness.
+      if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
         return false;
       if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I))
         Tys.push_back(CurrVS->SplitTy);