diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index de93fc390a02d..68c86612c1885 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -108,3 +108,8 @@ sycl/doc/extensions/ExplicitSIMD/ @kbobrovs @v-klochkov @kychendev llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp @MrSidims @vzakhari llvm/include/llvm/Transforms/Instrumentation/SPIRITTAnnotations.h @MrSidims @vzakhari llvm/test/Transforms/SPIRITTAnnotations/* @MrSidims @vzakhari + +# Generic address space support for printf +llvm/lib/SYCLLowerIR/MutatePrintfAddrspace.cpp @AGindinson @AlexeySachkov @mlychkov +llvm/include/llvm/SYCLLowerIR/MutatePrintfAddrspace.h @AGindinson @AlexeySachkov @mlychkov +llvm/test/SYCLLowerIR/printf_addrspace/* @AGindinson @AlexeySachkov @mlychkov diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e3e21c4efa484..c02e8ba1dbb20 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -44,6 +44,7 @@ #include "llvm/Passes/StandardInstrumentations.h" #include "llvm/SYCLLowerIR/ESIMDVerifier.h" #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" +#include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" @@ -1053,6 +1054,7 @@ void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager( if (CodeGenOpts.DisableLLVMPasses) PerModulePasses.add(createAlwaysInlinerLegacyPass(false)); PerModulePasses.add(createSYCLLowerWGLocalMemoryLegacyPass()); + PerModulePasses.add(createSYCLMutatePrintfAddrspaceLegacyPass()); } switch (Action) { @@ -1470,6 +1472,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( MPM.addPass(ModuleMemProfilerPass()); } } + if (LangOpts.SYCLIsDevice) { + MPM.addPass(SYCLMutatePrintfAddrspacePass()); + } // Add a verifier pass if requested. We don't have to do this if the action // requires code generation because there will already be a verifier pass in diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 24d3c1715ec5b..26314f0580305 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -436,6 +436,7 @@ void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGLegacyPassPass(PassRegistry &); void initializeSYCLLowerWGScopeLegacyPassPass(PassRegistry &); void initializeSYCLLowerESIMDLegacyPassPass(PassRegistry &); +void initializeSYCLMutatePrintfAddrspaceLegacyPassPass(PassRegistry &); void initializeSPIRITTAnnotationsLegacyPassPass(PassRegistry &); void initializeESIMDLowerLoadStorePass(PassRegistry &); void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index eb146d816a1b2..c3e9ae037ae69 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -38,6 +38,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/SYCLLowerIR/ESIMDVerifier.h" +#include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h" #include "llvm/Support/Valgrind.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" diff --git a/llvm/include/llvm/SYCLLowerIR/MutatePrintfAddrspace.h b/llvm/include/llvm/SYCLLowerIR/MutatePrintfAddrspace.h new file mode 100644 index 0000000000000..66a244ba554ed --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/MutatePrintfAddrspace.h @@ -0,0 +1,32 @@ +//===------- MutatePrintfAddrspace.h - SYCL printf AS mutation Pass -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass which detects non-constant address space +// literals usage for the first argument of SYCL experimental printf +// function, and moves the string literal to constant address +// space. This a temporary solution for printf's support of generic +// address space literals; the pass should be dropped once SYCL device +// backends learn to handle the generic address-spaced argument properly. +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class SYCLMutatePrintfAddrspacePass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +ModulePass *createSYCLMutatePrintfAddrspaceLegacyPass(); + +} // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index ac70ff7b14d60..0267bd8f8ad6a 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -78,6 +78,7 @@ #include "llvm/SYCLLowerIR/ESIMDVerifier.h" #include "llvm/SYCLLowerIR/LowerESIMD.h" #include "llvm/SYCLLowerIR/LowerWGScope.h" +#include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 8d13afe9d7f9c..f3eee93733323 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -119,6 +119,7 @@ MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) MODULE_PASS("LowerESIMD", SYCLLowerESIMDPass()) MODULE_PASS("ESIMDLowerVecArg", ESIMDLowerVecArgPass()) MODULE_PASS("esimd-verifier", ESIMDVerifierPass()) +MODULE_PASS("SYCLMutatePrintfAddrspace", SYCLMutatePrintfAddrspacePass()) MODULE_PASS("SPIRITTAnnotations", SPIRITTAnnotationsPass()) MODULE_PASS("deadargelim-sycl", DeadArgumentEliminationSYCLPass()) #undef MODULE_PASS diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index c68f72b1b33fc..520f53e4ffa4e 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_component_library(LLVMSYCLLowerIR LowerESIMDVecArg.cpp LowerWGLocalMemory.cpp ESIMDVerifier.cpp + MutatePrintfAddrspace.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR diff --git a/llvm/lib/SYCLLowerIR/MutatePrintfAddrspace.cpp b/llvm/lib/SYCLLowerIR/MutatePrintfAddrspace.cpp new file mode 100644 index 0000000000000..3cc9e40befa99 --- /dev/null +++ b/llvm/lib/SYCLLowerIR/MutatePrintfAddrspace.cpp @@ -0,0 +1,253 @@ +//===------ MutatePrintfAddrspace.cpp - SYCL printf AS mutation Pass ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass which detects non-constant address space +// literals usage for the first argument of SYCL experimental printf +// function, and moves the string literal to constant address +// space. This a temporary solution for printf's support of generic +// address space literals; the pass should be dropped once SYCL device +// backends learn to handle the generic address-spaced argument properly. +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h" + +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +namespace { +// Wrapper for the pass to make it working with the old pass manager +class SYCLMutatePrintfAddrspaceLegacyPass : public ModulePass { +public: + static char ID; + SYCLMutatePrintfAddrspaceLegacyPass() : ModulePass(ID) { + initializeSYCLMutatePrintfAddrspaceLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + // run the SYCLMutatePrintfAddrspace pass on the specified module + bool runOnModule(Module &M) override { + ModuleAnalysisManager MAM; + auto PA = Impl.run(M, MAM); + return !PA.areAllPreserved(); + } + +private: + SYCLMutatePrintfAddrspacePass Impl; +}; + +static constexpr unsigned ConstantAddrspaceID = 2; +// If the variadic version gets picked during FE compilation, we'll only have +// 1 function to replace. However, unique declarations are emitted for each +// of the non-variadic (variadic template) calls. +using FunctionVecTy = SmallVector; + +Function *getCASPrintfFunction(Module &M, PointerType *CASLiteralType); +size_t setFuncCallsOntoCASPrintf(Function *F, Function *CASPrintfFunc, + FunctionVecTy &FunctionsToDrop); +} // namespace + +char SYCLMutatePrintfAddrspaceLegacyPass::ID = 0; +INITIALIZE_PASS(SYCLMutatePrintfAddrspaceLegacyPass, + "SYCLMutatePrintfAddrspace", + "Move SYCL printf literal arguments to constant address space", + false, false) + +// Public interface to the SYCLMutatePrintfAddrspacePass. +ModulePass *llvm::createSYCLMutatePrintfAddrspaceLegacyPass() { + return new SYCLMutatePrintfAddrspaceLegacyPass(); +} + +PreservedAnalyses +SYCLMutatePrintfAddrspacePass::run(Module &M, ModuleAnalysisManager &MAM) { + Type *Int8Type = Type::getInt8Ty(M.getContext()); + auto *CASLiteralType = PointerType::get(Int8Type, ConstantAddrspaceID); + Function *CASPrintfFunc = getCASPrintfFunction(M, CASLiteralType); + + FunctionVecTy FunctionsToDrop; + bool ModuleChanged = false; + for (Function &F : M) { + if (!F.isDeclaration()) + continue; + if (!F.getName().startswith("_Z18__spirv_ocl_printf")) + continue; + if (F.getArg(0)->getType() == CASLiteralType) + // No need to replace the literal type and its printf users + continue; + ModuleChanged |= + setFuncCallsOntoCASPrintf(&F, CASPrintfFunc, FunctionsToDrop); + } + for (Function *F : FunctionsToDrop) + F->eraseFromParent(); + + return ModuleChanged ? PreservedAnalyses::all() : PreservedAnalyses::none(); +} + +/// Helper implementations +namespace { + +/// Get the constant addrspace version of the __spirv_ocl_printf declaration, +/// or generate it if the IR module doesn't have it yet. Also make it +/// variadic so that it could replace all non-variadic generic AS versions. +Function *getCASPrintfFunction(Module &M, PointerType *CASLiteralType) { + Type *Int32Type = Type::getInt32Ty(M.getContext()); + auto *CASPrintfFuncTy = FunctionType::get(Int32Type, CASLiteralType, + /*isVarArg=*/true); + // extern int __spirv_ocl_printf( + // const __attribute__((opencl_constant)) char *Format, ...) + FunctionCallee CASPrintfFuncCallee = + M.getOrInsertFunction("_Z18__spirv_ocl_printfPU3AS2Kcz", CASPrintfFuncTy); + auto *CASPrintfFunc = cast(CASPrintfFuncCallee.getCallee()); + CASPrintfFunc->setCallingConv(CallingConv::SPIR_FUNC); + CASPrintfFunc->setDSOLocal(true); + return CASPrintfFunc; +} + +/// Generate the constant addrspace version of the generic addrspace-residing +/// global string. If one exists already, get it from the module. +Constant *getCASLiteral(GlobalVariable *GenericASLiteral) { + Module *M = GenericASLiteral->getParent(); + // Appending the stable suffix ensures that only one CAS copy is made for each + // string. In case of the matching name, llvm::Module APIs will ensure that + // the existing global is returned. + std::string CASLiteralName = GenericASLiteral->getName().str() + "._AS2"; + if (GlobalVariable *ExistingGlobal = + M->getGlobalVariable(CASLiteralName, /*AllowInternal=*/true)) + return ExistingGlobal; + + StringRef LiteralValue; + getConstantStringInfo(GenericASLiteral, LiteralValue); + IRBuilder<> Builder(M->getContext()); + GlobalVariable *Res = Builder.CreateGlobalString(LiteralValue, CASLiteralName, + ConstantAddrspaceID, M); + Res->setLinkage(GlobalValue::LinkageTypes::InternalLinkage); + Res->setUnnamedAddr(GlobalValue::UnnamedAddr::None); + return Res; +} + +/// Encapsulates the update of CallInst's literal argument. +void setCallArgOntoCASPrintf(CallInst *CI, Constant *CASArg, + Function *CASPrintfFunc) { + CI->setCalledFunction(CASPrintfFunc); + auto *Const = CASArg; + // In case there's a misalignment between the updated function type and + // the constant literal type, create a constant pointer cast so as to + // duck module verifier complaints. + Type *ParamType = CASPrintfFunc->getFunctionType()->getParamType(0); + if (Const->getType() != ParamType) + Const = ConstantExpr::getPointerCast(Const, ParamType); + CI->setArgOperand(0, Const); +} + +/// The function's effect is similar to V->stripPointerCastsAndAliases(), but +/// also strips load/store aliases. +/// NB: This function can only operate on simple CFG, where load/store pairs +/// leading to the global variable are merely a consequence of low optimization +/// level. Re-using it for complex CFG with arbitrary memory paths is definitely +/// not recommended. +Value *stripToMemorySource(Value *V) { + Value *MemoryAccess = V; + if (auto *LI = dyn_cast(MemoryAccess)) { + Value *LoadSource = LI->getPointerOperand(); + auto *Store = cast(*llvm::find_if( + LoadSource->users(), [](User *U) { return isa(U); })); + MemoryAccess = Store->getValueOperand(); + } + return MemoryAccess->stripPointerCastsAndAliases(); +} + +void emitError(Function *PrintfInstance, CallInst *PrintfCall, + StringRef RecommendationToUser = "") { + std::string ErrorMsg = + std::string("experimental::printf requires format string to reside " + "in constant " + "address space. The compiler wasn't able to " + "automatically convert " + "your format string into constant address space when " + "processing builtin ") + + PrintfInstance->getName().str() + " called in function " + + PrintfCall->getFunction()->getName().str() + ".\n" + + RecommendationToUser.str(); + PrintfInstance->getContext().emitError(PrintfCall, ErrorMsg); +} + +/// This routine goes over CallInst users of F, resetting the called function +/// to CASPrintfFunc and generating/retracting constant addrspace format +/// strings to use as operands of the mutated calls. +size_t setFuncCallsOntoCASPrintf(Function *F, Function *CASPrintfFunc, + FunctionVecTy &FunctionsToDrop) { + size_t MutatedCallsCount = 0; + SmallVector, 16> CallsToMutate; + for (User *U : F->users()) { + if (!isa(U)) + continue; + auto *CI = cast(U); + + // This key algorithm reaches the global string used as an argument to a + // __spirv_ocl_printf call. It then generates a constant AS copy of that + // global (or gets an existing one). For the return value, the call + // instruction is paired with its future constant addrspace string + // argument. + Value *Stripped = stripToMemorySource(CI->getArgOperand(0)); + if (auto *Literal = dyn_cast(Stripped)) + CallsToMutate.emplace_back(CI, getCASLiteral(Literal)); + else if (auto *Arg = dyn_cast(Stripped)) { + // The global literal is passed to __spirv_ocl_printf via a wrapper + // function argument. We'll update the wrapper calls to use the builtin + // function directly instead. + Function *WrapperFunc = Arg->getParent(); + std::string BadWrapperErrorMsg = + "Consider simplifying the code by " + "passing format strings directly into experimental::printf calls, " + "avoiding indirection via wrapper function arguments."; + if (!WrapperFunc->getName().contains("6oneapi12experimental6printf")) { + emitError(WrapperFunc, CI, BadWrapperErrorMsg); + return 0; + } + for (User *WrapperU : WrapperFunc->users()) { + auto *WrapperCI = cast(WrapperU); + Value *StrippedArg = stripToMemorySource(WrapperCI->getArgOperand(0)); + auto *Literal = dyn_cast(StrippedArg); + // We only expect 1 level of wrappers + if (!Literal) { + emitError(WrapperFunc, WrapperCI, BadWrapperErrorMsg); + return 0; + } + CallsToMutate.emplace_back(WrapperCI, getCASLiteral(Literal)); + } + // We're certain that the wrapper won't have any uses, since we've just + // marked all its calls for replacement with __spirv_ocl_printf. + FunctionsToDrop.emplace_back(WrapperFunc); + // Similar certainty for the generic AS version of __spirv_ocl_printf + // itself - we've determined it only gets called inside the + // soon-to-be-removed wrapper. + assert(F->hasOneUse() && "Unexpected __spirv_ocl_printf call outside of " + "SYCL wrapper function"); + FunctionsToDrop.emplace_back(F); + } else { + emitError( + F, CI, + "Make sure each format string literal is " + "known at compile time or use OpenCL constant address space literals " + "for device-side printf calls"); + return 0; + } + } + for (auto &CallConstantPair : CallsToMutate) { + setCallArgOntoCASPrintf(CallConstantPair.first, CallConstantPair.second, + CASPrintfFunc); + ++MutatedCallsCount; + } + if (F->hasNUses(0)) + FunctionsToDrop.emplace_back(F); + return MutatedCallsCount; +} +} // namespace diff --git a/llvm/test/SYCLLowerIR/printf_addrspace/Inputs/experimental-printf.cpp b/llvm/test/SYCLLowerIR/printf_addrspace/Inputs/experimental-printf.cpp new file mode 100644 index 0000000000000..3f462fd7722c8 --- /dev/null +++ b/llvm/test/SYCLLowerIR/printf_addrspace/Inputs/experimental-printf.cpp @@ -0,0 +1,17 @@ +#include + +using namespace sycl; + +int main() { + queue q; + q.submit([&](handler &cgh) { + cgh.single_task([=]() { + ext::oneapi::experimental::printf("String No. %f\n", 1.0f); + const char *IntFormatString = "String No. %i\n"; + ext::oneapi::experimental::printf(IntFormatString, 2); + ext::oneapi::experimental::printf(IntFormatString, 3); + }); + }); + + return 0; +} diff --git a/llvm/test/SYCLLowerIR/printf_addrspace/generic_as.ll b/llvm/test/SYCLLowerIR/printf_addrspace/generic_as.ll new file mode 100644 index 0000000000000..7c252474b148c --- /dev/null +++ b/llvm/test/SYCLLowerIR/printf_addrspace/generic_as.ll @@ -0,0 +1,63 @@ +;; This tests replacement of string literal address space for __spirv_ocl_printf +;; at the regular O2 optimization level. + +;; Compiled with the following command (custom build of SYCL Clang with +;; SYCLMutatePrintfAddrspacePass turned off): +;; clang++ -fsycl -fsycl-device-only Inputs/experimental-printf.cpp -S -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ -O2 + +; RUN: opt < %s --SYCLMutatePrintfAddrspace -S | FileCheck %s +; RUN: opt < %s --SYCLMutatePrintfAddrspace -S --enable-new-pm=1 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" } +%"class.cl::sycl::detail::array" = type { [1 x i64] } +%"class.cl::sycl::id" = type { %"class.cl::sycl::detail::array" } + +$_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_EUlvE_ = comdat any + +; CHECK-DAG: @.str._AS2 = internal addrspace(2) constant [15 x i8] c"String No. %f\0A\00", align 1 +@.str = private unnamed_addr addrspace(1) constant [15 x i8] c"String No. %f\0A\00", align 1 +; CHECK-DAG: @.str.1._AS2 = internal addrspace(2) constant [15 x i8] c"String No. %i\0A\00", align 1 +@.str.1 = private unnamed_addr addrspace(1) constant [15 x i8] c"String No. %i\0A\00", align 1 + +; Function Attrs: convergent mustprogress norecurse +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_EUlvE_() local_unnamed_addr #2 comdat !kernel_arg_buffer_location !6 { +entry: + ; In particular, make sure that no argument promotion has been done for float + ; upon variadic redeclaration: + ; CHECK: tail call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str._AS2, i32 0, i32 0), float 1.000000e+00) + %call.i.i = tail call spir_func i32 @_Z18__spirv_ocl_printfIJfEEiPKcDpT_(i8 addrspace(4)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(4)* addrspacecast ([15 x i8] addrspace(1)* @.str to [15 x i8] addrspace(4)*), i64 0, i64 0), float 1.000000e+00) #3 + ; CHECK-NEXT: tail call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str.1._AS2, i32 0, i32 0), i32 2) + %call.i1.i = tail call spir_func i32 @_Z18__spirv_ocl_printfIJiEEiPKcDpT_(i8 addrspace(4)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(4)* addrspacecast ([15 x i8] addrspace(1)* @.str.1 to [15 x i8] addrspace(4)*), i64 0, i64 0), i32 2) #3 + ; CHECK-NEXT: tail call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str.1._AS2, i32 0, i32 0), i32 3) + %call.i2.i = tail call spir_func i32 @_Z18__spirv_ocl_printfIJiEEiPKcDpT_(i8 addrspace(4)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(4)* addrspacecast ([15 x i8] addrspace(1)* @.str.1 to [15 x i8] addrspace(4)*), i64 0, i64 0), i32 3) #3 + ret void +} + +; CHECK: declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)*, ...) + +; Function Attrs: convergent +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfIJfEEiPKcDpT_(i8 addrspace(4)*, float) local_unnamed_addr #1 + +; Function Attrs: convergent +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfIJiEEiPKcDpT_(i8 addrspace(4)*, i32) local_unnamed_addr #1 + +attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="experimental-printf.cpp" "uniform-work-group-size"="true" } +attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { convergent mustprogress norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="experimental-printf.cpp" "uniform-work-group-size"="true" } +attributes #3 = { convergent } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 14.0.0"} +!5 = !{i32 -1, i32 -1, i32 -1, i32 -1} +!6 = !{} diff --git a/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_negative_checks.ll b/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_negative_checks.ll new file mode 100644 index 0000000000000..caf019f4ce528 --- /dev/null +++ b/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_negative_checks.ll @@ -0,0 +1,18 @@ +; generic_as.ll +; RUN: opt < %S/generic_as.ll --SYCLMutatePrintfAddrspace -S | FileCheck %s --check-prefix=CHECK-BUILTIN +; RUN: opt < %S/generic_as.ll --SYCLMutatePrintfAddrspace -S --enable-new-pm=1 | FileCheck %s --check-prefix=CHECK-BUILTIN + +; generic_as_no_opt.ll +; RUN: opt < %S/generic_as_no_opt.ll --SYCLMutatePrintfAddrspace -S | FileCheck %s --check-prefixes=CHECK-WRAPPER,CHECK-BUILTIN +; RUN: opt < %S/generic_as_no_opt.ll --SYCLMutatePrintfAddrspace -S --enable-new-pm=1 | FileCheck %s --check-prefixes=CHECK-WRAPPER,CHECK-BUILTIN + +; generic_as_variadic.ll +; RUN: opt < %S/generic_as_variadic.ll --SYCLMutatePrintfAddrspace -S | FileCheck %s --check-prefix=CHECK-BUILTIN +; RUN: opt < %S/generic_as_variadic.ll --SYCLMutatePrintfAddrspace -S --enable-new-pm=1 | FileCheck %s --check-prefix=CHECK-BUILTIN + +; Check that the wrapper bodies have been deleted after call replacement +; CHECK-WRAPPER-NOT: spir_func i32 @{{.*}}sycl{{.*}}printf + +; Make sure the generic AS declarations have been wiped out +; in favor of the single constant AS & variadic declaration: +; CHECK-BUILTIN-NOT: declare dso_local spir_func i32 @_Z18__spirv_ocl_printf{{.*}}(i8 addrspace(4)*, {{.+}}) diff --git a/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_no_opt.ll b/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_no_opt.ll new file mode 100644 index 0000000000000..0785430aa453d --- /dev/null +++ b/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_no_opt.ll @@ -0,0 +1,390 @@ +;; This tests replacement of string literal address space for __spirv_ocl_printf +;; when no optimizations (inlining, constant propagation) have been performed prior +;; to the pass scheduling. + +;; Compiled with the following command (custom build of SYCL Clang with +;; SYCLMutatePrintfAddrspacePass turned off): +;; clang++ -fsycl -fsycl-device-only Inputs/experimental-printf.cpp -S -O0 -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + +; RUN: opt < %s --SYCLMutatePrintfAddrspace -S | FileCheck %s +; RUN: opt < %s --SYCLMutatePrintfAddrspace -S --enable-new-pm=1 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" } +%"class.cl::sycl::detail::array" = type { [1 x i64] } +%"class.cl::sycl::id" = type { %"class.cl::sycl::detail::array" } +%class.anon = type { %"class.cl::sycl::accessor" } +%"class.cl::sycl::accessor" = type { %"class.cl::sycl::detail::AccessorImplDevice" } +%"class.cl::sycl::detail::AccessorImplDevice" = type { %"class.cl::sycl::id", %"class.cl::sycl::range", %"class.cl::sycl::range" } +%"class.cl::sycl::detail::accessor_common" = type { i8 } +%class.anon.0 = type { i8 } + +$_ZN2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEC2Ev = comdat any + +$_ZN2cl4sycl2idILi1EEC2Ev = comdat any + +$_ZN2cl4sycl6detail14InitializedValILi1ENS0_5rangeEE3getILi0EEENS3_ILi1EEEv = comdat any + +$_ZN2cl4sycl6detail18AccessorImplDeviceILi1EEC2ENS0_2idILi1EEENS0_5rangeILi1EEES7_ = comdat any + +$_ZN2cl4sycl6detail5arrayILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE = comdat any + +$_ZN2cl4sycl5rangeILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE = comdat any + +$_ZN2cl4sycl6detail5arrayILi1EEixEi = comdat any + +$_ZN2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE9getOffsetEv = comdat any + +$_ZN2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE14getAccessRangeEv = comdat any + +$_ZN2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE14getMemoryRangeEv = comdat any + +$_ZNK2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEixILi1EvEERS3_NS0_2idILi1EEE = comdat any + +$_ZN2cl4sycl2idILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE = comdat any + +$_ZNK2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE14getLinearIndexILi1EEEmNS0_2idIXT_EEE = comdat any + +$_ZNK2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE15getQualifiedPtrEv = comdat any + +$_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_EUlvE_ = comdat any + +$_ZN2cl4sycl3ext6oneapi12experimental6printfIcJfEEEiPKT_DpT0_ = comdat any + +$_ZN2cl4sycl3ext6oneapi12experimental6printfIcJiEEEiPKT_DpT0_ = comdat any + +; CHECK-DAG: @.str._AS2 = internal addrspace(2) constant [15 x i8] c"String No. %f\0A\00", align 1 +@.str = private unnamed_addr addrspace(1) constant [15 x i8] c"String No. %f\0A\00", align 1 +; CHECK-DAG: @.str.1._AS2 = internal addrspace(2) constant [15 x i8] c"String No. %i\0A\00", align 1 +@.str.1 = private unnamed_addr addrspace(1) constant [15 x i8] c"String No. %i\0A\00", align 1 + +; Function Attrs: convergent noinline norecurse optnone +define linkonce_odr dso_local spir_func void @_ZN2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEEC2Ev(%"class.cl::sycl::accessor" addrspace(4)* align 8 dereferenceable_or_null(32) %this) unnamed_addr #1 comdat align 2 { +entry: + %this.addr = alloca %"class.cl::sycl::accessor" addrspace(4)*, align 8 + %agg.tmp = alloca %"class.cl::sycl::id", align 8 + %agg.tmp2 = alloca %"class.cl::sycl::range", align 8 + %agg.tmp3 = alloca %"class.cl::sycl::range", align 8 + %this.addr.ascast = addrspacecast %"class.cl::sycl::accessor" addrspace(4)** %this.addr to %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* + %agg.tmp.ascast = addrspacecast %"class.cl::sycl::id"* %agg.tmp to %"class.cl::sycl::id" addrspace(4)* + %agg.tmp2.ascast = addrspacecast %"class.cl::sycl::range"* %agg.tmp2 to %"class.cl::sycl::range" addrspace(4)* + %agg.tmp3.ascast = addrspacecast %"class.cl::sycl::range"* %agg.tmp3 to %"class.cl::sycl::range" addrspace(4)* + store %"class.cl::sycl::accessor" addrspace(4)* %this, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %this1 = load %"class.cl::sycl::accessor" addrspace(4)*, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %0 = bitcast %"class.cl::sycl::accessor" addrspace(4)* %this1 to %"class.cl::sycl::detail::accessor_common" addrspace(4)* + %impl = getelementptr inbounds %"class.cl::sycl::accessor", %"class.cl::sycl::accessor" addrspace(4)* %this1, i32 0, i32 0 + %1 = bitcast %"class.cl::sycl::id" addrspace(4)* %agg.tmp.ascast to i8 addrspace(4)* + call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 8 %1, i8 0, i64 8, i1 false) + call spir_func void @_ZN2cl4sycl2idILi1EEC2Ev(%"class.cl::sycl::id" addrspace(4)* align 8 dereferenceable_or_null(8) %agg.tmp.ascast) #8 + call spir_func void @_ZN2cl4sycl6detail14InitializedValILi1ENS0_5rangeEE3getILi0EEENS3_ILi1EEEv(%"class.cl::sycl::range" addrspace(4)* sret(%"class.cl::sycl::range") align 8 %agg.tmp2.ascast) #8 + call spir_func void @_ZN2cl4sycl6detail14InitializedValILi1ENS0_5rangeEE3getILi0EEENS3_ILi1EEEv(%"class.cl::sycl::range" addrspace(4)* sret(%"class.cl::sycl::range") align 8 %agg.tmp3.ascast) #8 + %agg.tmp.ascast.ascast = addrspacecast %"class.cl::sycl::id" addrspace(4)* %agg.tmp.ascast to %"class.cl::sycl::id"* + %agg.tmp2.ascast.ascast = addrspacecast %"class.cl::sycl::range" addrspace(4)* %agg.tmp2.ascast to %"class.cl::sycl::range"* + %agg.tmp3.ascast.ascast = addrspacecast %"class.cl::sycl::range" addrspace(4)* %agg.tmp3.ascast to %"class.cl::sycl::range"* + call spir_func void @_ZN2cl4sycl6detail18AccessorImplDeviceILi1EEC2ENS0_2idILi1EEENS0_5rangeILi1EEES7_(%"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* align 8 dereferenceable_or_null(24) %impl, %"class.cl::sycl::id"* byval(%"class.cl::sycl::id") align 8 %agg.tmp.ascast.ascast, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %agg.tmp2.ascast.ascast, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %agg.tmp3.ascast.ascast) #8 + ret void +} + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* noalias nocapture writeonly, i8 addrspace(4)* noalias nocapture readonly, i64, i1 immarg) #3 + +; Function Attrs: convergent mustprogress noinline norecurse optnone +define internal spir_func void @_ZZZN2cl4sycl6detailL19submitAssertCaptureERNS0_5queueERNS0_5eventEPS2_RKNS1_13code_locationEENKUlRNS0_7handlerEE_clESB_ENKUlvE_clEv(%class.anon addrspace(4)* align 8 dereferenceable_or_null(32) %this) #2 align 2 { +entry: + %this.addr = alloca %class.anon addrspace(4)*, align 8 + %agg.tmp = alloca %"class.cl::sycl::id", align 8 + %this.addr.ascast = addrspacecast %class.anon addrspace(4)** %this.addr to %class.anon addrspace(4)* addrspace(4)* + %agg.tmp.ascast = addrspacecast %"class.cl::sycl::id"* %agg.tmp to %"class.cl::sycl::id" addrspace(4)* + store %class.anon addrspace(4)* %this, %class.anon addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %this1 = load %class.anon addrspace(4)*, %class.anon addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %0 = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %this1, i32 0, i32 0 + call spir_func void @_ZN2cl4sycl2idILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE(%"class.cl::sycl::id" addrspace(4)* align 8 dereferenceable_or_null(8) %agg.tmp.ascast, i64 0) #8 + %agg.tmp.ascast.ascast = addrspacecast %"class.cl::sycl::id" addrspace(4)* %agg.tmp.ascast to %"class.cl::sycl::id"* + ret void +} + +; Function Attrs: argmemonly nofree nounwind willreturn writeonly +declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1 immarg) #4 + +; Function Attrs: convergent noinline norecurse optnone +define linkonce_odr dso_local spir_func void @_ZN2cl4sycl2idILi1EEC2Ev(%"class.cl::sycl::id" addrspace(4)* align 8 dereferenceable_or_null(8) %this) unnamed_addr #1 comdat align 2 { +entry: + %this.addr = alloca %"class.cl::sycl::id" addrspace(4)*, align 8 + %this.addr.ascast = addrspacecast %"class.cl::sycl::id" addrspace(4)** %this.addr to %"class.cl::sycl::id" addrspace(4)* addrspace(4)* + store %"class.cl::sycl::id" addrspace(4)* %this, %"class.cl::sycl::id" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %this1 = load %"class.cl::sycl::id" addrspace(4)*, %"class.cl::sycl::id" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %0 = bitcast %"class.cl::sycl::id" addrspace(4)* %this1 to %"class.cl::sycl::detail::array" addrspace(4)* + call spir_func void @_ZN2cl4sycl6detail5arrayILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE(%"class.cl::sycl::detail::array" addrspace(4)* align 8 dereferenceable_or_null(8) %0, i64 0) #8 + ret void +} + +; Function Attrs: convergent mustprogress noinline norecurse optnone +define linkonce_odr dso_local spir_func void @_ZN2cl4sycl6detail14InitializedValILi1ENS0_5rangeEE3getILi0EEENS3_ILi1EEEv(%"class.cl::sycl::range" addrspace(4)* noalias sret(%"class.cl::sycl::range") align 8 %agg.result) #2 comdat align 2 { +entry: + call spir_func void @_ZN2cl4sycl5rangeILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE(%"class.cl::sycl::range" addrspace(4)* align 8 dereferenceable_or_null(8) %agg.result, i64 0) #8 + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define linkonce_odr dso_local spir_func void @_ZN2cl4sycl6detail18AccessorImplDeviceILi1EEC2ENS0_2idILi1EEENS0_5rangeILi1EEES7_(%"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* align 8 dereferenceable_or_null(24) %this, %"class.cl::sycl::id"* byval(%"class.cl::sycl::id") align 8 %Offset, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %AccessRange, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %MemoryRange) unnamed_addr #5 comdat align 2 { +entry: + %this.addr = alloca %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)*, align 8 + %this.addr.ascast = addrspacecast %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)** %this.addr to %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* addrspace(4)* + store %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* %this, %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %Offset.ascast = addrspacecast %"class.cl::sycl::id"* %Offset to %"class.cl::sycl::id" addrspace(4)* + %AccessRange.ascast = addrspacecast %"class.cl::sycl::range"* %AccessRange to %"class.cl::sycl::range" addrspace(4)* + %MemoryRange.ascast = addrspacecast %"class.cl::sycl::range"* %MemoryRange to %"class.cl::sycl::range" addrspace(4)* + %this1 = load %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)*, %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %Offset2 = getelementptr inbounds %"class.cl::sycl::detail::AccessorImplDevice", %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* %this1, i32 0, i32 0 + %0 = bitcast %"class.cl::sycl::id" addrspace(4)* %Offset2 to i8 addrspace(4)* + %1 = bitcast %"class.cl::sycl::id" addrspace(4)* %Offset.ascast to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %0, i8 addrspace(4)* align 8 %1, i64 8, i1 false) + %AccessRange3 = getelementptr inbounds %"class.cl::sycl::detail::AccessorImplDevice", %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* %this1, i32 0, i32 1 + %2 = bitcast %"class.cl::sycl::range" addrspace(4)* %AccessRange3 to i8 addrspace(4)* + %3 = bitcast %"class.cl::sycl::range" addrspace(4)* %AccessRange.ascast to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %2, i8 addrspace(4)* align 8 %3, i64 8, i1 false) + %MemRange = getelementptr inbounds %"class.cl::sycl::detail::AccessorImplDevice", %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* %this1, i32 0, i32 2 + %4 = bitcast %"class.cl::sycl::range" addrspace(4)* %MemRange to i8 addrspace(4)* + %5 = bitcast %"class.cl::sycl::range" addrspace(4)* %MemoryRange.ascast to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 8 %4, i8 addrspace(4)* align 8 %5, i64 8, i1 false) + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define linkonce_odr dso_local spir_func void @_ZN2cl4sycl6detail5arrayILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE(%"class.cl::sycl::detail::array" addrspace(4)* align 8 dereferenceable_or_null(8) %this, i64 %dim0) unnamed_addr #5 comdat align 2 { +entry: + %this.addr = alloca %"class.cl::sycl::detail::array" addrspace(4)*, align 8 + %dim0.addr = alloca i64, align 8 + %this.addr.ascast = addrspacecast %"class.cl::sycl::detail::array" addrspace(4)** %this.addr to %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* + %dim0.addr.ascast = addrspacecast i64* %dim0.addr to i64 addrspace(4)* + store %"class.cl::sycl::detail::array" addrspace(4)* %this, %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + store i64 %dim0, i64 addrspace(4)* %dim0.addr.ascast, align 8 + %this1 = load %"class.cl::sycl::detail::array" addrspace(4)*, %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %common_array = getelementptr inbounds %"class.cl::sycl::detail::array", %"class.cl::sycl::detail::array" addrspace(4)* %this1, i32 0, i32 0 + %arrayinit.begin = getelementptr inbounds [1 x i64], [1 x i64] addrspace(4)* %common_array, i64 0, i64 0 + %0 = load i64, i64 addrspace(4)* %dim0.addr.ascast, align 8 + store i64 %0, i64 addrspace(4)* %arrayinit.begin, align 8 + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define linkonce_odr dso_local spir_func void @_ZN2cl4sycl5rangeILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE(%"class.cl::sycl::range" addrspace(4)* align 8 dereferenceable_or_null(8) %this, i64 %dim0) unnamed_addr #5 comdat align 2 { +entry: + %this.addr = alloca %"class.cl::sycl::range" addrspace(4)*, align 8 + %dim0.addr = alloca i64, align 8 + %this.addr.ascast = addrspacecast %"class.cl::sycl::range" addrspace(4)** %this.addr to %"class.cl::sycl::range" addrspace(4)* addrspace(4)* + %dim0.addr.ascast = addrspacecast i64* %dim0.addr to i64 addrspace(4)* + store %"class.cl::sycl::range" addrspace(4)* %this, %"class.cl::sycl::range" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + store i64 %dim0, i64 addrspace(4)* %dim0.addr.ascast, align 8 + %this1 = load %"class.cl::sycl::range" addrspace(4)*, %"class.cl::sycl::range" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %0 = bitcast %"class.cl::sycl::range" addrspace(4)* %this1 to %"class.cl::sycl::detail::array" addrspace(4)* + %1 = load i64, i64 addrspace(4)* %dim0.addr.ascast, align 8 + call spir_func void @_ZN2cl4sycl6detail5arrayILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE(%"class.cl::sycl::detail::array" addrspace(4)* align 8 dereferenceable_or_null(8) %0, i64 %1) #8 + ret void +} + +; Function Attrs: convergent mustprogress noinline norecurse optnone +define linkonce_odr dso_local spir_func align 8 dereferenceable(8) i64 addrspace(4)* @_ZN2cl4sycl6detail5arrayILi1EEixEi(%"class.cl::sycl::detail::array" addrspace(4)* align 8 dereferenceable_or_null(8) %this, i32 %dimension) #2 comdat align 2 { +entry: + %this.addr.i = alloca %"class.cl::sycl::detail::array" addrspace(4)*, align 8 + %dimension.addr.i = alloca i32, align 4 + %retval = alloca i64 addrspace(4)*, align 8 + %this.addr = alloca %"class.cl::sycl::detail::array" addrspace(4)*, align 8 + %dimension.addr = alloca i32, align 4 + %retval.ascast = addrspacecast i64 addrspace(4)** %retval to i64 addrspace(4)* addrspace(4)* + %this.addr.ascast = addrspacecast %"class.cl::sycl::detail::array" addrspace(4)** %this.addr to %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* + %dimension.addr.ascast = addrspacecast i32* %dimension.addr to i32 addrspace(4)* + store %"class.cl::sycl::detail::array" addrspace(4)* %this, %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + store i32 %dimension, i32 addrspace(4)* %dimension.addr.ascast, align 4 + %this1 = load %"class.cl::sycl::detail::array" addrspace(4)*, %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %0 = load i32, i32 addrspace(4)* %dimension.addr.ascast, align 4 + %this.addr.ascast.i = addrspacecast %"class.cl::sycl::detail::array" addrspace(4)** %this.addr.i to %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* + %dimension.addr.ascast.i = addrspacecast i32* %dimension.addr.i to i32 addrspace(4)* + store %"class.cl::sycl::detail::array" addrspace(4)* %this1, %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* %this.addr.ascast.i, align 8 + store i32 %0, i32 addrspace(4)* %dimension.addr.ascast.i, align 4 + %this1.i = load %"class.cl::sycl::detail::array" addrspace(4)*, %"class.cl::sycl::detail::array" addrspace(4)* addrspace(4)* %this.addr.ascast.i, align 8 + %common_array = getelementptr inbounds %"class.cl::sycl::detail::array", %"class.cl::sycl::detail::array" addrspace(4)* %this1, i32 0, i32 0 + %1 = load i32, i32 addrspace(4)* %dimension.addr.ascast, align 4 + %idxprom = sext i32 %1 to i64 + %arrayidx = getelementptr inbounds [1 x i64], [1 x i64] addrspace(4)* %common_array, i64 0, i64 %idxprom + ret i64 addrspace(4)* %arrayidx +} + +; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +define linkonce_odr dso_local spir_func align 8 dereferenceable(8) %"class.cl::sycl::id" addrspace(4)* @_ZN2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE9getOffsetEv(%"class.cl::sycl::accessor" addrspace(4)* align 8 dereferenceable_or_null(32) %this) #6 comdat align 2 { +entry: + %retval = alloca %"class.cl::sycl::id" addrspace(4)*, align 8 + %this.addr = alloca %"class.cl::sycl::accessor" addrspace(4)*, align 8 + %retval.ascast = addrspacecast %"class.cl::sycl::id" addrspace(4)** %retval to %"class.cl::sycl::id" addrspace(4)* addrspace(4)* + %this.addr.ascast = addrspacecast %"class.cl::sycl::accessor" addrspace(4)** %this.addr to %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* + store %"class.cl::sycl::accessor" addrspace(4)* %this, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %this1 = load %"class.cl::sycl::accessor" addrspace(4)*, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %impl = getelementptr inbounds %"class.cl::sycl::accessor", %"class.cl::sycl::accessor" addrspace(4)* %this1, i32 0, i32 0 + %Offset = getelementptr inbounds %"class.cl::sycl::detail::AccessorImplDevice", %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* %impl, i32 0, i32 0 + ret %"class.cl::sycl::id" addrspace(4)* %Offset +} + +; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +define linkonce_odr dso_local spir_func align 8 dereferenceable(8) %"class.cl::sycl::range" addrspace(4)* @_ZN2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE14getAccessRangeEv(%"class.cl::sycl::accessor" addrspace(4)* align 8 dereferenceable_or_null(32) %this) #6 comdat align 2 { +entry: + %retval = alloca %"class.cl::sycl::range" addrspace(4)*, align 8 + %this.addr = alloca %"class.cl::sycl::accessor" addrspace(4)*, align 8 + %retval.ascast = addrspacecast %"class.cl::sycl::range" addrspace(4)** %retval to %"class.cl::sycl::range" addrspace(4)* addrspace(4)* + %this.addr.ascast = addrspacecast %"class.cl::sycl::accessor" addrspace(4)** %this.addr to %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* + store %"class.cl::sycl::accessor" addrspace(4)* %this, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %this1 = load %"class.cl::sycl::accessor" addrspace(4)*, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %impl = getelementptr inbounds %"class.cl::sycl::accessor", %"class.cl::sycl::accessor" addrspace(4)* %this1, i32 0, i32 0 + %AccessRange = getelementptr inbounds %"class.cl::sycl::detail::AccessorImplDevice", %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* %impl, i32 0, i32 1 + ret %"class.cl::sycl::range" addrspace(4)* %AccessRange +} + +; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone +define linkonce_odr dso_local spir_func align 8 dereferenceable(8) %"class.cl::sycl::range" addrspace(4)* @_ZN2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE14getMemoryRangeEv(%"class.cl::sycl::accessor" addrspace(4)* align 8 dereferenceable_or_null(32) %this) #6 comdat align 2 { +entry: + %retval = alloca %"class.cl::sycl::range" addrspace(4)*, align 8 + %this.addr = alloca %"class.cl::sycl::accessor" addrspace(4)*, align 8 + %retval.ascast = addrspacecast %"class.cl::sycl::range" addrspace(4)** %retval to %"class.cl::sycl::range" addrspace(4)* addrspace(4)* + %this.addr.ascast = addrspacecast %"class.cl::sycl::accessor" addrspace(4)** %this.addr to %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* + store %"class.cl::sycl::accessor" addrspace(4)* %this, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %this1 = load %"class.cl::sycl::accessor" addrspace(4)*, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %impl = getelementptr inbounds %"class.cl::sycl::accessor", %"class.cl::sycl::accessor" addrspace(4)* %this1, i32 0, i32 0 + %MemRange = getelementptr inbounds %"class.cl::sycl::detail::AccessorImplDevice", %"class.cl::sycl::detail::AccessorImplDevice" addrspace(4)* %impl, i32 0, i32 2 + ret %"class.cl::sycl::range" addrspace(4)* %MemRange +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define linkonce_odr dso_local spir_func void @_ZN2cl4sycl2idILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE(%"class.cl::sycl::id" addrspace(4)* align 8 dereferenceable_or_null(8) %this, i64 %dim0) unnamed_addr #5 comdat align 2 { +entry: + %this.addr = alloca %"class.cl::sycl::id" addrspace(4)*, align 8 + %dim0.addr = alloca i64, align 8 + %this.addr.ascast = addrspacecast %"class.cl::sycl::id" addrspace(4)** %this.addr to %"class.cl::sycl::id" addrspace(4)* addrspace(4)* + %dim0.addr.ascast = addrspacecast i64* %dim0.addr to i64 addrspace(4)* + store %"class.cl::sycl::id" addrspace(4)* %this, %"class.cl::sycl::id" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + store i64 %dim0, i64 addrspace(4)* %dim0.addr.ascast, align 8 + %this1 = load %"class.cl::sycl::id" addrspace(4)*, %"class.cl::sycl::id" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %0 = bitcast %"class.cl::sycl::id" addrspace(4)* %this1 to %"class.cl::sycl::detail::array" addrspace(4)* + %1 = load i64, i64 addrspace(4)* %dim0.addr.ascast, align 8 + call spir_func void @_ZN2cl4sycl6detail5arrayILi1EEC2ILi1EEENSt9enable_ifIXeqT_Li1EEmE4typeE(%"class.cl::sycl::detail::array" addrspace(4)* align 8 dereferenceable_or_null(8) %0, i64 %1) #8 + ret void +} + +; Function Attrs: convergent mustprogress noinline norecurse optnone +define linkonce_odr dso_local spir_func i64 @_ZNK2cl4sycl8accessorINS0_6detail14AssertHappenedELi1ELNS0_6access4modeE1025ELNS4_6targetE2014ELNS4_11placeholderE0ENS0_3ext6oneapi22accessor_property_listIJEEEE14getLinearIndexILi1EEEmNS0_2idIXT_EEE(%"class.cl::sycl::accessor" addrspace(4)* align 8 dereferenceable_or_null(32) %this, %"class.cl::sycl::id"* byval(%"class.cl::sycl::id") align 8 %Id) #2 comdat align 2 { +entry: + %retval = alloca i64, align 8 + %this.addr = alloca %"class.cl::sycl::accessor" addrspace(4)*, align 8 + %Result = alloca i64, align 8 + %retval.ascast = addrspacecast i64* %retval to i64 addrspace(4)* + %this.addr.ascast = addrspacecast %"class.cl::sycl::accessor" addrspace(4)** %this.addr to %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* + %Result.ascast = addrspacecast i64* %Result to i64 addrspace(4)* + store %"class.cl::sycl::accessor" addrspace(4)* %this, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %Id.ascast = addrspacecast %"class.cl::sycl::id"* %Id to %"class.cl::sycl::id" addrspace(4)* + %this1 = load %"class.cl::sycl::accessor" addrspace(4)*, %"class.cl::sycl::accessor" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %0 = bitcast %"class.cl::sycl::id" addrspace(4)* %Id.ascast to %"class.cl::sycl::detail::array" addrspace(4)* + %call = call spir_func align 8 dereferenceable(8) i64 addrspace(4)* @_ZN2cl4sycl6detail5arrayILi1EEixEi(%"class.cl::sycl::detail::array" addrspace(4)* align 8 dereferenceable_or_null(8) %0, i32 0) #8 + %1 = load i64, i64 addrspace(4)* %call, align 8 + ret i64 %1 +} + +; Function Attrs: convergent mustprogress noinline norecurse optnone +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_EUlvE_() #0 comdat !kernel_arg_buffer_location !9 { +entry: + %0 = alloca %class.anon.0, align 1 + %1 = addrspacecast %class.anon.0* %0 to %class.anon.0 addrspace(4)* + call spir_func void @_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlvE_clEv(%class.anon.0 addrspace(4)* align 1 dereferenceable_or_null(1) %1) #8 + ret void +} + +; CHECK-LABEL: define internal spir_func void @_ZZZ4main{{.*}} +; Function Attrs: convergent mustprogress noinline norecurse optnone +define internal spir_func void @_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlvE_clEv(%class.anon.0 addrspace(4)* align 1 dereferenceable_or_null(1) %this) #2 align 2 { +entry: + %this.addr = alloca %class.anon.0 addrspace(4)*, align 8 + %IntFormatString = alloca i8 addrspace(4)*, align 8 + %this.addr.ascast = addrspacecast %class.anon.0 addrspace(4)** %this.addr to %class.anon.0 addrspace(4)* addrspace(4)* + %IntFormatString.ascast = addrspacecast i8 addrspace(4)** %IntFormatString to i8 addrspace(4)* addrspace(4)* + store %class.anon.0 addrspace(4)* %this, %class.anon.0 addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + %this1 = load %class.anon.0 addrspace(4)*, %class.anon.0 addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 + ; In particular, make sure that no argument promotion has been done for float + ; upon variadic redeclaration: + ; CHECK: call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str._AS2, i32 0, i32 0), float 1.000000e+00) + %call = call spir_func i32 @_ZN2cl4sycl3ext6oneapi12experimental6printfIcJfEEEiPKT_DpT0_(i8 addrspace(4)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(4)* addrspacecast ([15 x i8] addrspace(1)* @.str to [15 x i8] addrspace(4)*), i64 0, i64 0), float 1.000000e+00) #8 + store i8 addrspace(4)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(4)* addrspacecast ([15 x i8] addrspace(1)* @.str.1 to [15 x i8] addrspace(4)*), i64 0, i64 0), i8 addrspace(4)* addrspace(4)* %IntFormatString.ascast, align 8 + %0 = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %IntFormatString.ascast, align 8 + ; CHECK: call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str.1._AS2, i32 0, i32 0), i32 2) + %call2 = call spir_func i32 @_ZN2cl4sycl3ext6oneapi12experimental6printfIcJiEEEiPKT_DpT0_(i8 addrspace(4)* %0, i32 2) #8 + %1 = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %IntFormatString.ascast, align 8 + ; CHECK: call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str.1._AS2, i32 0, i32 0), i32 3) + %call3 = call spir_func i32 @_ZN2cl4sycl3ext6oneapi12experimental6printfIcJiEEEiPKT_DpT0_(i8 addrspace(4)* %1, i32 3) #8 + ret void +} + +; CHECK-LABEL: declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)*, ...) + +; Function Attrs: convergent mustprogress noinline norecurse optnone +define linkonce_odr dso_local spir_func i32 @_ZN2cl4sycl3ext6oneapi12experimental6printfIcJfEEEiPKT_DpT0_(i8 addrspace(4)* %__format, float %args) #2 comdat { +entry: + %retval = alloca i32, align 4 + %__format.addr = alloca i8 addrspace(4)*, align 8 + %args.addr = alloca float, align 4 + %retval.ascast = addrspacecast i32* %retval to i32 addrspace(4)* + %__format.addr.ascast = addrspacecast i8 addrspace(4)** %__format.addr to i8 addrspace(4)* addrspace(4)* + %args.addr.ascast = addrspacecast float* %args.addr to float addrspace(4)* + store i8 addrspace(4)* %__format, i8 addrspace(4)* addrspace(4)* %__format.addr.ascast, align 8 + store float %args, float addrspace(4)* %args.addr.ascast, align 4 + %0 = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %__format.addr.ascast, align 8 + %1 = load float, float addrspace(4)* %args.addr.ascast, align 4 + %call = call spir_func i32 @_Z18__spirv_ocl_printfIJfEEiPKcDpT_(i8 addrspace(4)* %0, float %1) #8 + ret i32 %call +} + +; Function Attrs: convergent mustprogress noinline norecurse optnone +define linkonce_odr dso_local spir_func i32 @_ZN2cl4sycl3ext6oneapi12experimental6printfIcJiEEEiPKT_DpT0_(i8 addrspace(4)* %__format, i32 %args) #2 comdat { +entry: + %retval = alloca i32, align 4 + %__format.addr = alloca i8 addrspace(4)*, align 8 + %args.addr = alloca i32, align 4 + %retval.ascast = addrspacecast i32* %retval to i32 addrspace(4)* + %__format.addr.ascast = addrspacecast i8 addrspace(4)** %__format.addr to i8 addrspace(4)* addrspace(4)* + %args.addr.ascast = addrspacecast i32* %args.addr to i32 addrspace(4)* + store i8 addrspace(4)* %__format, i8 addrspace(4)* addrspace(4)* %__format.addr.ascast, align 8 + store i32 %args, i32 addrspace(4)* %args.addr.ascast, align 4 + %0 = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %__format.addr.ascast, align 8 + %1 = load i32, i32 addrspace(4)* %args.addr.ascast, align 4 + %call = call spir_func i32 @_Z18__spirv_ocl_printfIJiEEiPKcDpT_(i8 addrspace(4)* %0, i32 %1) #8 + ret i32 %call +} + +; Function Attrs: convergent +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfIJfEEiPKcDpT_(i8 addrspace(4)*, float) #7 + +; Function Attrs: convergent +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfIJiEEiPKcDpT_(i8 addrspace(4)*, i32) #7 + +attributes #0 = { convergent mustprogress noinline norecurse optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="experimental-printf.cpp" "uniform-work-group-size"="true" } +attributes #1 = { convergent noinline norecurse optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { convergent mustprogress noinline norecurse optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { argmemonly nofree nounwind willreturn } +attributes #4 = { argmemonly nofree nounwind willreturn writeonly } +attributes #5 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #6 = { convergent mustprogress noinline norecurse nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #7 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #8 = { convergent } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 14.0.0"} +!5 = !{i32 -1, i32 -1, i32 -1, i32 -1} +!6 = distinct !{!6, !7, !8} +!7 = !{!"llvm.loop.mustprogress"} +!8 = !{!"llvm.loop.unroll.enable"} +!9 = !{} diff --git a/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_variadic.ll b/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_variadic.ll new file mode 100644 index 0000000000000..8726af3c0ac47 --- /dev/null +++ b/llvm/test/SYCLLowerIR/printf_addrspace/generic_as_variadic.ll @@ -0,0 +1,60 @@ +;; This tests replacement of string literal address space for the variadic version of +;; __spirv_ocl_printf at the regular O2 optimization level. +;; Note: this test's checks are almost identical to those for non-variadic version +;; of pre-transformation printf functions. However, we can't exclude argument promotion +;; here since it has been enforced by FE. + +;; Compiled with the following command (custom build of SYCL Clang with +;; SYCLMutatePrintfAddrspacePass turned off): +;; clang++ -fsycl -fsycl-device-only Inputs/experimental-printf.cpp -S -O2 + +; RUN: opt < %s --SYCLMutatePrintfAddrspace -S | FileCheck %s +; RUN: opt < %s --SYCLMutatePrintfAddrspace -S --enable-new-pm=1 | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" } +%"class.cl::sycl::detail::array" = type { [1 x i64] } +%"class.cl::sycl::id" = type { %"class.cl::sycl::detail::array" } + +$_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_EUlvE_ = comdat any + +; CHECK-DAG: @.str._AS2 = internal addrspace(2) constant [15 x i8] c"String No. %f\0A\00", align 1 +@.str = private unnamed_addr addrspace(1) constant [15 x i8] c"String No. %f\0A\00", align 1 +; CHECK-DAG: @.str.1._AS2 = internal addrspace(2) constant [15 x i8] c"String No. %i\0A\00", align 1 +@.str.1 = private unnamed_addr addrspace(1) constant [15 x i8] c"String No. %i\0A\00", align 1 + +; Function Attrs: convergent mustprogress norecurse +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_EUlvE_() local_unnamed_addr #2 comdat !kernel_arg_buffer_location !6 { +entry: + ; CHECK: tail call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str._AS2, i32 0, i32 0), double 1.000000e+00) + %call.i.i = tail call spir_func i32 (i8 addrspace(4)*, ...) @_Z18__spirv_ocl_printfPKcz(i8 addrspace(4)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(4)* addrspacecast ([15 x i8] addrspace(1)* @.str to [15 x i8] addrspace(4)*), i64 0, i64 0), double 1.000000e+00) #3 + ; CHECK-NEXT: tail call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str.1._AS2, i32 0, i32 0), i32 2) + %call.i1.i = tail call spir_func i32 (i8 addrspace(4)*, ...) @_Z18__spirv_ocl_printfPKcz(i8 addrspace(4)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(4)* addrspacecast ([15 x i8] addrspace(1)* @.str.1 to [15 x i8] addrspace(4)*), i64 0, i64 0), i32 2) #3 + ; CHECK-NEXT: tail call spir_func i32 (i8 addrspace(2)*, ...) @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(2)* @.str.1._AS2, i32 0, i32 0), i32 3) + %call.i2.i = tail call spir_func i32 (i8 addrspace(4)*, ...) @_Z18__spirv_ocl_printfPKcz(i8 addrspace(4)* getelementptr inbounds ([15 x i8], [15 x i8] addrspace(4)* addrspacecast ([15 x i8] addrspace(1)* @.str.1 to [15 x i8] addrspace(4)*), i64 0, i64 0), i32 3) #3 + ret void +} + +; CHECK: declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(i8 addrspace(2)*, ...) +; Function Attrs: convergent +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPKcz(i8 addrspace(4)*, ...) local_unnamed_addr #1 + +attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../../../tests/experimental-printf.cpp" "uniform-work-group-size"="true" } +attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { convergent mustprogress norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../../../tests/experimental-printf.cpp" "uniform-work-group-size"="true" } +attributes #3 = { convergent } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 14.0.0"} +!5 = !{i32 -1, i32 -1, i32 -1, i32 -1} +!6 = !{} diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index e0fe6de4de12a..84fdf78e0c385 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -581,6 +581,7 @@ int main(int argc, char **argv) { initializeESIMDLowerVecArgLegacyPassPass(Registry); initializeESIMDVerifierPass(Registry); initializeSYCLLowerWGLocalMemoryLegacyPass(Registry); + initializeSYCLMutatePrintfAddrspaceLegacyPassPass(Registry); #ifdef BUILD_EXAMPLES initializeExampleIRTransforms(Registry); diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index 55b0764aefe1f..fc548e79c0c02 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -725,9 +725,12 @@ template extern SYCL_EXTERNAL int __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, Args... args); +template +extern SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, Args... args); #else extern SYCL_EXTERNAL int __spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, ...); +extern SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, ...); #endif #else // if !__SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 8e710f4dd0979..e32e1c70a5a97 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -40,9 +40,13 @@ namespace experimental { // executing program. // // - According to the OpenCL spec, the format string must reside in constant -// address space. This requires to perform "tricky" declarations of them, see -// test/built-ins/printf.cpp for examples -// FIXME: this potentially can be done on SYCL FE side automatically +// address space. The constant address space declarations might get "tricky", +// see test/built-ins/printf.cpp for examples. +// In simple cases (compile-time known string contents, direct declaration of +// the format literal inside the printf call, etc.), the compiler should handle +// the automatic address space conversion. +// FIXME: Once the extension to generic address space is fully supported, the +// constant AS version may need to be deprecated. // // - The format string is interpreted according to the OpenCL C spec, where all // data types has fixed size, opposed to C++ types which doesn't guarantee @@ -59,8 +63,8 @@ namespace experimental { // guarded using __SYCL_DEVICE_ONLY__ preprocessor macro or avoided in favor // of more portable solutions if needed // -template -int printf(const __SYCL_CONSTANT_AS char *__format, Args... args) { +template +int printf(const FormatT *__format, Args... args) { #if defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__) return __spirv_ocl_printf(__format, args...); #else