From 629302ae46049b38352d3e77b5c6a2eb1ff7ddc5 Mon Sep 17 00:00:00 2001 From: Janek van Oirschot Date: Tue, 4 Jun 2024 13:55:46 +0100 Subject: [PATCH 1/3] [AMDGPU] Support SIProgramInfo MCExpr for comments and remarks --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 121 ++++++++++++-------- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 7 ++ 2 files changed, 81 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index cad4a3430327b..7ac2f9db5e046 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -400,6 +400,37 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( false); } +std::string AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) { + std::string Str; + raw_string_ostream OSS(Str); + int64_t IVal; + if (Value->evaluateAsAbsolute(IVal)) { + OSS << static_cast(IVal); + } else { + Value->print(OSS, MAI); + } + return Str; +} + +void AMDGPUAsmPrinter::emitCommonFunctionComments( + const MCExpr *NumVGPR, std::optional NumAGPR, + const MCExpr *TotalNumVGPR, const MCExpr *NumSGPR, + const MCExpr *ScratchSize, uint64_t CodeSize, + const AMDGPUMachineFunction *MFI) { + OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); + OutStreamer->emitRawComment(" NumSgprs: " + getMCExprStr(NumSGPR), false); + OutStreamer->emitRawComment(" NumVgprs: " + getMCExprStr(NumVGPR), false); + if (NumAGPR) { + OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(*NumAGPR), false); + OutStreamer->emitRawComment(" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR), + false); + } + OutStreamer->emitRawComment(" ScratchSize: " + getMCExprStr(ScratchSize), + false); + OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()), + false); +} + uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties( const MachineFunction &MF) const { const SIMachineFunctionInfo &MFI = *MF.getInfo(); @@ -554,13 +585,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment(" Kernel info:", false); emitCommonFunctionComments( - getMCExprValue(CurrentProgramInfo.NumArchVGPR, Ctx), - STM.hasMAIInsts() ? getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx) - : std::optional(), - getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx), - getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx), - getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx), - getFunctionCodeSize(MF), MFI); + CurrentProgramInfo.NumArchVGPR, + STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR + : std::optional(), + CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR, + CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI); OutStreamer->emitRawComment( " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false); @@ -571,43 +600,38 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { " bytes/workgroup (compile time only)", false); OutStreamer->emitRawComment( - " SGPRBlocks: " + - Twine(getMCExprValue(CurrentProgramInfo.SGPRBlocks, Ctx)), - false); + " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks), false); + OutStreamer->emitRawComment( - " VGPRBlocks: " + - Twine(getMCExprValue(CurrentProgramInfo.VGPRBlocks, Ctx)), - false); + " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks), false); OutStreamer->emitRawComment( " NumSGPRsForWavesPerEU: " + - Twine( - getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx)), + getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU), false); OutStreamer->emitRawComment( " NumVGPRsForWavesPerEU: " + - Twine( - getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx)), + getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU), false); - if (STM.hasGFX90AInsts()) + if (STM.hasGFX90AInsts()) { + const MCExpr *AdjustedAccum = MCBinaryExpr::createAdd( + CurrentProgramInfo.AccumOffset, MCConstantExpr::create(1, Ctx), Ctx); + AdjustedAccum = MCBinaryExpr::createMul( + AdjustedAccum, MCConstantExpr::create(4, Ctx), Ctx); OutStreamer->emitRawComment( - " AccumOffset: " + - Twine((getMCExprValue(CurrentProgramInfo.AccumOffset, Ctx) + 1) * - 4), - false); + " AccumOffset: " + getMCExprStr(AdjustedAccum), false); + } OutStreamer->emitRawComment( - " Occupancy: " + - Twine(getMCExprValue(CurrentProgramInfo.Occupancy, Ctx)), - false); + " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false); OutStreamer->emitRawComment( " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false); OutStreamer->emitRawComment( " COMPUTE_PGM_RSRC2:SCRATCH_EN: " + - Twine(getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx)), + getMCExprStr(CurrentProgramInfo.ScratchEnable), false); OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " + Twine(CurrentProgramInfo.UserSGPR), @@ -628,20 +652,25 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Twine(CurrentProgramInfo.TIdIGCompCount), false); + int64_t PGMRSrc3; assert(STM.hasGFX90AInsts() || - getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx) == 0); + (CurrentProgramInfo.ComputePGMRSrc3GFX90A->evaluateAsAbsolute( + PGMRSrc3) && + static_cast(PGMRSrc3) == 0)); if (STM.hasGFX90AInsts()) { OutStreamer->emitRawComment( " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " + - Twine((AMDHSA_BITS_GET( - getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx), - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET))), + getMCExprStr(MCKernelDescriptor::bits_get( + CurrentProgramInfo.ComputePGMRSrc3GFX90A, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)), false); OutStreamer->emitRawComment( " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " + - Twine((AMDHSA_BITS_GET( - getMCExprValue(CurrentProgramInfo.ComputePGMRSrc3GFX90A, Ctx), - amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT))), + getMCExprStr(MCKernelDescriptor::bits_get( + CurrentProgramInfo.ComputePGMRSrc3GFX90A, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, + amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)), false); } } @@ -1463,28 +1492,26 @@ void AMDGPUAsmPrinter::emitResourceUsageRemarks( // remarks to simulate newlines. If and when clang does accept newlines, this // formatting should be aggregated into one remark with newlines to avoid // printing multiple diagnostic location and diag opts. - MCContext &MCCtx = MF.getContext(); EmitResourceUsageRemark("FunctionName", "Function Name", MF.getFunction().getName()); EmitResourceUsageRemark("NumSGPR", "SGPRs", - getMCExprValue(CurrentProgramInfo.NumSGPR, MCCtx)); - EmitResourceUsageRemark( - "NumVGPR", "VGPRs", - getMCExprValue(CurrentProgramInfo.NumArchVGPR, MCCtx)); + getMCExprStr(CurrentProgramInfo.NumSGPR)); + EmitResourceUsageRemark("NumVGPR", "VGPRs", + getMCExprStr(CurrentProgramInfo.NumArchVGPR)); if (hasMAIInsts) { - EmitResourceUsageRemark( - "NumAGPR", "AGPRs", - getMCExprValue(CurrentProgramInfo.NumAccVGPR, MCCtx)); + EmitResourceUsageRemark("NumAGPR", "AGPRs", + getMCExprStr(CurrentProgramInfo.NumAccVGPR)); } - EmitResourceUsageRemark( - "ScratchSize", "ScratchSize [bytes/lane]", - getMCExprValue(CurrentProgramInfo.ScratchSize, MCCtx)); + EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]", + getMCExprStr(CurrentProgramInfo.ScratchSize)); + int64_t DynStack; + bool DynStackEvaluatable = + CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack); StringRef DynamicStackStr = - getMCExprValue(CurrentProgramInfo.DynamicCallStack, MCCtx) ? "True" - : "False"; + DynStackEvaluatable && DynStack ? "True" : "False"; EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr); EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]", - getMCExprValue(CurrentProgramInfo.Occupancy, MCCtx)); + getMCExprStr(CurrentProgramInfo.Occupancy)); EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill", CurrentProgramInfo.SGPRSpill); EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 87156f27fc6c5..2a3a39029dd2a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -65,6 +65,12 @@ class AMDGPUAsmPrinter final : public AsmPrinter { uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, const AMDGPUMachineFunction *MFI); + void emitCommonFunctionComments(const MCExpr *NumVGPR, + std::optional NumAGPR, + const MCExpr *TotalNumVGPR, + const MCExpr *NumSGPR, + const MCExpr *ScratchSize, uint64_t CodeSize, + const AMDGPUMachineFunction *MFI); void emitResourceUsageRemarks(const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo, bool isModuleEntryFunction, bool hasMAIInsts); @@ -79,6 +85,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { void initTargetStreamer(Module &M); static uint64_t getMCExprValue(const MCExpr *Value, MCContext &Ctx); + std::string getMCExprStr(const MCExpr *Value); public: explicit AMDGPUAsmPrinter(TargetMachine &TM, From 7c6ab8fb39f34bb344c4f9130c6f89123c460f05 Mon Sep 17 00:00:00 2001 From: Janek van Oirschot Date: Fri, 7 Jun 2024 13:40:25 +0100 Subject: [PATCH 2/3] SmallString + StringRef, check for nullptr instead of optional --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 20 +++++++++----------- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 5 ++--- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 7ac2f9db5e046..fe0789aee35ea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -400,28 +400,27 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( false); } -std::string AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) { - std::string Str; - raw_string_ostream OSS(Str); +StringRef AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) { + SmallString<128> Str; + raw_svector_ostream OSS(Str); int64_t IVal; if (Value->evaluateAsAbsolute(IVal)) { OSS << static_cast(IVal); } else { Value->print(OSS, MAI); } - return Str; + return OSS.str(); } void AMDGPUAsmPrinter::emitCommonFunctionComments( - const MCExpr *NumVGPR, std::optional NumAGPR, - const MCExpr *TotalNumVGPR, const MCExpr *NumSGPR, - const MCExpr *ScratchSize, uint64_t CodeSize, + const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR, + const MCExpr *NumSGPR, const MCExpr *ScratchSize, uint64_t CodeSize, const AMDGPUMachineFunction *MFI) { OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); OutStreamer->emitRawComment(" NumSgprs: " + getMCExprStr(NumSGPR), false); OutStreamer->emitRawComment(" NumVgprs: " + getMCExprStr(NumVGPR), false); - if (NumAGPR) { - OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(*NumAGPR), false); + if (NumAGPR && TotalNumVGPR) { + OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(NumAGPR), false); OutStreamer->emitRawComment(" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR), false); } @@ -586,8 +585,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->emitRawComment(" Kernel info:", false); emitCommonFunctionComments( CurrentProgramInfo.NumArchVGPR, - STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR - : std::optional(), + STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR : nullptr, CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR, CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 2a3a39029dd2a..7534b470d6a3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -65,8 +65,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, const AMDGPUMachineFunction *MFI); - void emitCommonFunctionComments(const MCExpr *NumVGPR, - std::optional NumAGPR, + void emitCommonFunctionComments(const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR, const MCExpr *NumSGPR, const MCExpr *ScratchSize, uint64_t CodeSize, @@ -85,7 +84,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { void initTargetStreamer(Module &M); static uint64_t getMCExprValue(const MCExpr *Value, MCContext &Ctx); - std::string getMCExprStr(const MCExpr *Value); + StringRef getMCExprStr(const MCExpr *Value); public: explicit AMDGPUAsmPrinter(TargetMachine &TM, From b804bdfef73ee5242f0d9bc5f8794d601f720453 Mon Sep 17 00:00:00 2001 From: Janek van Oirschot Date: Fri, 7 Jun 2024 16:20:44 +0100 Subject: [PATCH 3/3] Feedback --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 4 ++-- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index fe0789aee35ea..2541c3ea6a1fa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -400,7 +400,7 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( false); } -StringRef AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) { +SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) { SmallString<128> Str; raw_svector_ostream OSS(Str); int64_t IVal; @@ -409,7 +409,7 @@ StringRef AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) { } else { Value->print(OSS, MAI); } - return OSS.str(); + return Str; } void AMDGPUAsmPrinter::emitCommonFunctionComments( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 7534b470d6a3e..162cd40687c7e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -84,7 +84,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { void initTargetStreamer(Module &M); static uint64_t getMCExprValue(const MCExpr *Value, MCContext &Ctx); - StringRef getMCExprStr(const MCExpr *Value); + SmallString<128> getMCExprStr(const MCExpr *Value); public: explicit AMDGPUAsmPrinter(TargetMachine &TM,