From f94083ddfdf101d825dd37df582bf1a17b3ab745 Mon Sep 17 00:00:00 2001 From: Harrison Hao Date: Mon, 23 Jun 2025 15:45:17 +0800 Subject: [PATCH] [AMDGPU][NFC] Refactor D16 folding for image samples with multiple ExtractElement+FPTrunc chains --- .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 128 ++++++++++-------- 1 file changed, 69 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 5477c5eae9392..171d44b5ec329 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -34,6 +34,12 @@ struct AMDGPUImageDMaskIntrinsic { unsigned Intr; }; +struct D16Candidate { + SmallVector InstsToErase; + Instruction *Replacee = nullptr; + Value *Index = nullptr; +}; + #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL #include "InstCombineTables.inc" @@ -150,6 +156,67 @@ static std::optional modifyIntrinsicCall( return RetValue; } +/// Attempts to fold an image sample whose users are ExtractElement + FPTrunc +/// chains into a D16-returning version. +static std::optional +modifyImageIntrinsicForD16(IntrinsicInst &II, + const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, + InstCombiner &IC) { + SmallVector Candidates; + + // Collect all (ExtractElement, FPTrunc) pairs; abort on the first mismatch + for (User *U : II.users()) { + auto *Ext = dyn_cast(U); + if (!Ext || !Ext->hasOneUse()) + return std::nullopt; + + auto *Tr = dyn_cast(*Ext->user_begin()); + if (!Tr || !Tr->getType()->getScalarType()->isHalfTy()) + return std::nullopt; + + auto &Cand = Candidates.emplace_back(); + Cand.InstsToErase = {Tr, Ext}; + Cand.Replacee = Tr; + Cand.Index = Ext->getIndexOperand(); + } + + if (Candidates.empty()) + return std::nullopt; + + // Build the new half-vector return type + auto *VecTy = cast(II.getType()); + Type *HalfVecTy = VecTy->getWithNewType(Type::getHalfTy(II.getContext())); + + // Obtain the original image sample intrinsic's signature + // and replace its return type with the half-vector for D16 folding + SmallVector SigTys; + Intrinsic::getIntrinsicSignature(II.getCalledFunction(), SigTys); + SigTys[0] = HalfVecTy; + + Function *HalfDecl = Intrinsic::getOrInsertDeclaration( + II.getModule(), ImageDimIntr->Intr, SigTys); + + II.mutateType(HalfVecTy); + II.setCalledFunction(HalfDecl); + + // Replace each chain with a single ExtractElement from the new D16 image + IRBuilder<> B(II.getContext()); + for (auto &[Insts, Replacee, Idx] : Candidates) { + B.SetInsertPoint(Replacee); + auto *HalfExtract = B.CreateExtractElement(&II, Idx); + HalfExtract->takeName(Replacee); + Replacee->replaceAllUsesWith(HalfExtract); + } + + // Erase the old instructions + for (auto &[Insts, Replacee, Idx] : Candidates) { + for (auto *I : Insts) + IC.eraseInstFromFunction(*I); + } + + return &II; +} + static std::optional simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, @@ -249,65 +316,8 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, } } - // Only perform D16 folding if every user of the image sample is - // an ExtractElementInst immediately followed by an FPTrunc to half. - SmallVector, 4> - ExtractTruncPairs; - bool AllHalfExtracts = true; - - for (User *U : II.users()) { - auto *Ext = dyn_cast(U); - if (!Ext || !Ext->hasOneUse()) { - AllHalfExtracts = false; - break; - } - - auto *Tr = dyn_cast(*Ext->user_begin()); - if (!Tr || !Tr->getType()->isHalfTy()) { - AllHalfExtracts = false; - break; - } - - ExtractTruncPairs.emplace_back(Ext, Tr); - } - - if (!ExtractTruncPairs.empty() && AllHalfExtracts) { - auto *VecTy = cast(II.getType()); - Type *HalfVecTy = - VecTy->getWithNewType(Type::getHalfTy(II.getContext())); - - // Obtain the original image sample intrinsic's signature - // and replace its return type with the half-vector for D16 folding - SmallVector SigTys; - Intrinsic::getIntrinsicSignature(II.getCalledFunction(), SigTys); - SigTys[0] = HalfVecTy; - - Module *M = II.getModule(); - Function *HalfDecl = - Intrinsic::getOrInsertDeclaration(M, ImageDimIntr->Intr, SigTys); - - II.mutateType(HalfVecTy); - II.setCalledFunction(HalfDecl); - - IRBuilder<> Builder(II.getContext()); - for (auto &[Ext, Tr] : ExtractTruncPairs) { - Value *Idx = Ext->getIndexOperand(); - - Builder.SetInsertPoint(Tr); - - Value *HalfExtract = Builder.CreateExtractElement(&II, Idx); - HalfExtract->takeName(Tr); - - Tr->replaceAllUsesWith(HalfExtract); - } - - for (auto &[Ext, Tr] : ExtractTruncPairs) { - IC.eraseInstFromFunction(*Tr); - IC.eraseInstFromFunction(*Ext); - } - - return &II; - } + if (auto FoldedII = modifyImageIntrinsicForD16(II, ImageDimIntr, IC)) + return *FoldedII; } }