diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index c3fdad6ce756f..a17b8b1eb16c0 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -10,6 +10,9 @@ #ifdef __SPIR__ +// To support fallback device libraries on-demand loading, please update the +// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add +// or remove any item in this file. DEVICE_EXTERN_C double __devicelib_log(double x) { return __spirv_ocl_log(x); } diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 50a98d01c363c..8ab3c7a0239ba 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -10,6 +10,11 @@ #ifdef __SPIR__ +// To support fallback device libraries on-demand loading, please update the +// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add +// or remove any item in this file. +// TODO: generate the DeviceLibFuncMap in sycl-post-link.cpp automatically +// during the build based on libdevice to avoid manually sync. DEVICE_EXTERN_C float __devicelib_scalbnf(float x, int n) { return __spirv_ocl_ldexp(x, n); } diff --git a/libdevice/fallback-complex-fp64.cpp b/libdevice/fallback-complex-fp64.cpp index cc9d80f27180a..4fae34a627312 100644 --- a/libdevice/fallback-complex-fp64.cpp +++ b/libdevice/fallback-complex-fp64.cpp @@ -12,6 +12,9 @@ #ifdef __SPIR__ #include +// To support fallback device libraries on-demand loading, please update the +// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add +// or remove any item in this file. DEVICE_EXTERN_C double __devicelib_creal(double __complex__ z) { return __real__(z); } diff --git a/libdevice/fallback-complex.cpp b/libdevice/fallback-complex.cpp index 91094eab30f27..6ae1bf667abb2 100644 --- a/libdevice/fallback-complex.cpp +++ b/libdevice/fallback-complex.cpp @@ -12,6 +12,9 @@ #ifdef __SPIR__ #include +// To support fallback device libraries on-demand loading, please update the +// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add +// or remove any item in this file. DEVICE_EXTERN_C float __devicelib_crealf(float __complex__ z) { return __real__(z); } diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index a8b329f02a62d..b2842b7a842c1 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -123,6 +123,7 @@ class PropertySetRegistry { // Specific property category names used by tools. static constexpr char SYCL_SPECIALIZATION_CONSTANTS[] = "SYCL/specialization constants"; + static constexpr char SYCL_DEVICELIB_REQ_MASK[] = "SYCL/devicelib req mask"; // Function for bulk addition of an entire property set under given category // (property set name). @@ -160,4 +161,4 @@ class PropertySetRegistry { } // namespace util } // namespace llvm -#endif // #define LLVM_SUPPORT_PROPERTYSETIO_H \ No newline at end of file +#endif // #define LLVM_SUPPORT_PROPERTYSETIO_H diff --git a/llvm/lib/Support/PropertySetIO.cpp b/llvm/lib/Support/PropertySetIO.cpp index a70983faeda59..bcf48f97a7f55 100644 --- a/llvm/lib/Support/PropertySetIO.cpp +++ b/llvm/lib/Support/PropertySetIO.cpp @@ -112,5 +112,6 @@ template <> PropertyValue::Type PropertyValue::getTypeTag() { } constexpr char PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS[]; +constexpr char PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK[]; } // namespace util } // namespace llvm diff --git a/llvm/lib/Support/SimpleTable.cpp b/llvm/lib/Support/SimpleTable.cpp index 0f65686a61f23..f364d0cdc1d1e 100644 --- a/llvm/lib/Support/SimpleTable.cpp +++ b/llvm/lib/Support/SimpleTable.cpp @@ -64,7 +64,7 @@ int SimpleTable::getColumnId(StringRef ColName) const { Error SimpleTable::addColumnName(StringRef ColName) { if (ColumnName2Num.find(ColName) != ColumnName2Num.end()) - return makeError("column already exists" + ColName); + return makeError("column already exists " + ColName); ColumnNames.emplace_back(ColName.str()); ColumnName2Num[ColumnNames.back()] = static_cast(ColumnNames.size()) - 1; ColumnNum2Name.push_back(std::prev(ColumnNames.end())); diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 66c842ef285e7..9bbbbb7541799 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -15,6 +15,7 @@ #include "SpecConstants.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/InstIterator.h" @@ -47,6 +48,18 @@ cl::OptionCategory PostLinkCat{"sycl-post-link options"}; static constexpr char COL_CODE[] = "Code"; static constexpr char COL_SYM[] = "Symbols"; static constexpr char COL_PROPS[] = "Properties"; +static constexpr char DEVICELIB_FUNC_PREFIX[] = "__devicelib_"; + +// DeviceLibExt is shared between sycl-post-link tool and sycl runtime. +// If any change is made here, need to sync with DeviceLibExt definition +// in sycl/source/detail/program_manager/program_manager.hpp +enum class DeviceLibExt : std::uint32_t { + cl_intel_devicelib_assert, + cl_intel_devicelib_math, + cl_intel_devicelib_math_fp64, + cl_intel_devicelib_complex, + cl_intel_devicelib_complex_fp64 +}; // InputFilename - The filename to read from. static cl::opt InputFilename{ @@ -104,6 +117,143 @@ static cl::opt SpecConstLower{ "set spec constants to C++ defaults")), cl::cat(PostLinkCat)}; +struct ImagePropSaveInfo { + bool NeedDeviceLibReqMask; + bool DoSpecConst; + bool SetSpecConstAtRT; + bool SpecConstsMet; +}; +// Please update DeviceLibFuncMap if any item is added to or removed from +// fallback device libraries in libdevice. +static std::unordered_map DeviceLibFuncMap = { + {"__devicelib_acosf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_acoshf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_asinf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_asinhf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_atan2f", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_atanf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_atanhf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_cbrtf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_cosf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_coshf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_erfcf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_erff", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_exp2f", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_expf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_expm1f", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_fdimf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_fmaf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_fmodf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_frexpf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_hypotf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_ilogbf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_ldexpf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_lgammaf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_log10f", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_log1pf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_log2f", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_logbf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_logf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_modff", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_nextafterf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_powf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_remainderf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_remquof", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_sinf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_sinhf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_sqrtf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_tanf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_tanhf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_tgammaf", DeviceLibExt::cl_intel_devicelib_math}, + {"__devicelib_acos", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_acosh", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_asin", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_asinh", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_atan", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_atan2", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_atanh", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_cbrt", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_cos", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_cosh", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_erf", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_erfc", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_exp", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_exp2", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_expm1", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_fdim", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_fma", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_fmod", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_frexp", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_hypot", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_ilogb", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_ldexp", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_lgamma", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_log", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_log10", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_log1p", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_log2", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_logb", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_modf", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_nextafter", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_pow", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_remainder", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_remquo", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_sin", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_sinh", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_sqrt", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_tan", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_tanh", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib_tgamma", DeviceLibExt::cl_intel_devicelib_math_fp64}, + {"__devicelib___divsc3", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib___mulsc3", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cabsf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cacosf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cacoshf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cargf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_casinf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_casinhf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_catanf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_catanhf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_ccosf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_ccoshf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cexpf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cimagf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_clogf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cpolarf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cpowf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_cprojf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_crealf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_csinf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_csinhf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_csqrtf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_ctanf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib_ctanhf", DeviceLibExt::cl_intel_devicelib_complex}, + {"__devicelib___divdc3", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib___muldc3", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_cabs", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_cacos", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_cacosh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_carg", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_casin", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_casinh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_catan", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_catanh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_ccos", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_ccosh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_cexp", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_cimag", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_clog", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_cpolar", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_cpow", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_cproj", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_creal", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_csin", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_csinh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_csqrt", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_ctan", DeviceLibExt::cl_intel_devicelib_complex_fp64}, + {"__devicelib_ctanh", DeviceLibExt::cl_intel_devicelib_complex_fp64}, +}; + static void error(const Twine &Msg) { errs() << "sycl-post-link: " << Msg << '\n'; exit(1); @@ -295,20 +445,76 @@ saveResultModules(std::vector> &ResModules) { return Res; } -static string_vector -saveSpecConstantIDMaps(const std::vector &Maps) { - string_vector Res; +// Each fallback device library corresponds to one bit in "require mask" which +// is an unsigned int32. getDeviceLibBit checks which fallback device library +// is required for FuncName and returns the corresponding bit. The corresponding +// mask for each fallback device library is: +// fallback-cassert: 0x1 +// fallback-cmath: 0x2 +// fallback-cmath-fp64: 0x4 +// fallback-complex: 0x8 +// fallback-complex-fp64: 0x10 +static uint32_t getDeviceLibBits(const std::string &FuncName) { + auto DeviceLibFuncIter = DeviceLibFuncMap.find(FuncName); + return ((DeviceLibFuncIter == DeviceLibFuncMap.end()) + ? 0 + : 0x1 << (static_cast(DeviceLibFuncIter->second) - + static_cast( + DeviceLibExt::cl_intel_devicelib_assert))); +} - for (size_t I = 0; I < Maps.size(); ++I) { - std::string SCFile = makeResultFileName(".prop", I); +// For each device image module, we go through all functions which meets +// 1. The function name has prefix "__devicelib_" +// 2. The function is declaration which means it doesn't have function body +// And we don't expect non-spirv functions with "__devicelib_" prefix. +static uint32_t getModuleReqMask(const Module &M) { + // Device libraries will be enabled only for spir-v module. + if (!llvm::Triple(M.getTargetTriple()).isSPIR()) + return 0; + // 0x1 means sycl runtime will link and load libsycl-fallback-assert.spv as + // default. In fact, default link assert spv is not necessary but dramatic + // perf regression is observed if we don't link any device library. The perf + // regression is caused by a clang issue. + uint32_t ReqMask = 0x1; + for (const Function &SF : M) { + if (SF.getName().startswith(DEVICELIB_FUNC_PREFIX) && SF.isDeclaration()) { + assert(SF.getCallingConv() == CallingConv::SPIR_FUNC); + uint32_t DeviceLibBits = getDeviceLibBits(SF.getName().str()); + ReqMask |= DeviceLibBits; + } + } + return ReqMask; +} + +static string_vector saveDeviceImageProperty( + const std::vector> &ResultModules, + const ImagePropSaveInfo &ImgPSInfo) { + string_vector Res; + for (size_t I = 0; I < ResultModules.size(); ++I) { llvm::util::PropertySetRegistry PropSet; - PropSet.add(llvm::util::PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS, - Maps[I]); + if (ImgPSInfo.NeedDeviceLibReqMask) { + uint32_t MRMask = getModuleReqMask(*ResultModules[I]); + std::map RMEntry = {{"DeviceLibReqMask", MRMask}}; + PropSet.add(llvm::util::PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK, + RMEntry); + } + if (ImgPSInfo.DoSpecConst && ImgPSInfo.SetSpecConstAtRT) { + // extract spec constant maps per each module + SpecIDMapTy TmpSpecIDMap; + if (ImgPSInfo.SpecConstsMet) + SpecConstantsPass::collectSpecConstantMetadata(*ResultModules[I].get(), + TmpSpecIDMap); + PropSet.add( + llvm::util::PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS, + TmpSpecIDMap); + } std::error_code EC; + std::string SCFile = makeResultFileName(".prop", I); raw_fd_ostream SCOut(SCFile, EC); PropSet.write(SCOut); Res.emplace_back(std::move(SCFile)); } + return Res; } @@ -412,7 +618,6 @@ int main(int argc, char **argv) { } std::vector> ResultModules; - std::vector ResultSpecIDMaps; string_vector ResultSymbolsLists; util::SimpleTable Table; @@ -456,15 +661,11 @@ int main(int argc, char **argv) { Error Err = Table.addColumn(COL_CODE, Files); CHECK_AND_EXIT(Err); } - if (DoSpecConst && SetSpecConstAtRT) { - // extract spec constant maps per each module - for (auto &MUptr : ResultModules) { - ResultSpecIDMaps.emplace_back(SpecIDMapTy()); - if (SpecConstsMet) - SpecConstantsPass::collectSpecConstantMetadata(*MUptr.get(), - ResultSpecIDMaps.back()); - } - string_vector Files = saveSpecConstantIDMaps(ResultSpecIDMaps); + + { + ImagePropSaveInfo ImgPSInfo = {true, DoSpecConst, SetSpecConstAtRT, + SpecConstsMet}; + string_vector Files = saveDeviceImageProperty(ResultModules, ImgPSInfo); Error Err = Table.addColumn(COL_PROPS, Files); CHECK_AND_EXIT(Err); } diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 6718261c485b8..5ddb2825f4946 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -649,6 +649,8 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; /// PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS defined in /// PropertySetIO.h #define PI_PROPERTY_SET_SPEC_CONST_MAP "SYCL/specialization constants" +/// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h +#define PI_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" /// This struct is a record of the device binary information. If the Kind field /// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index 5ff6e6312df36..63f7440ef8756 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -250,6 +250,7 @@ class DeviceBinaryImage { ConstIterator begin() const { return ConstIterator(Begin); } ConstIterator end() const { return ConstIterator(End); } friend class DeviceBinaryImage; + bool isAvailable() const { return !(Begin == nullptr); } private: PropertyRange() : Begin(nullptr), End(nullptr) {} @@ -298,6 +299,7 @@ class DeviceBinaryImage { /// name of the property is the specializaion constant symbolic ID and the /// value is 32-bit unsigned integer ID. const PropertyRange &getSpecConstants() const { return SpecConstIDMap; } + const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; } virtual ~DeviceBinaryImage() {} protected: @@ -307,6 +309,7 @@ class DeviceBinaryImage { pi_device_binary Bin; pi::PiDeviceBinaryType Format = PI_DEVICE_BINARY_TYPE_NONE; DeviceBinaryImage::PropertyRange SpecConstIDMap; + DeviceBinaryImage::PropertyRange DeviceLibReqMask; }; /// Tries to determine the device binary image foramat. Returns diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index b00844cf85a59..8e249e1406f23 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -543,6 +543,7 @@ void DeviceBinaryImage::init(pi_device_binary Bin) { Format = getBinaryImageFormat(Bin->BinaryStart, getSize()); SpecConstIDMap.init(Bin, PI_PROPERTY_SET_SPEC_CONST_MAP); + DeviceLibReqMask.init(Bin, PI_PROPERTY_SET_DEVICELIB_REQ_MASK); } } // namespace pi diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 1d788a9bbc1fb..92d47716b7587 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -373,7 +373,11 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, // Link a fallback implementation of device libraries if they are not // supported by a device compiler. // Pre-compiled programs are supposed to be already linked. - const bool LinkDeviceLibs = Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV; + // If device image is not SPIRV, DeviceLibReqMask will be 0 which means + // no fallback device library will be linked. + uint32_t DeviceLibReqMask = 0; + if (Img.getFormat() == PI_DEVICE_BINARY_TYPE_SPIRV) + DeviceLibReqMask = getDeviceLibReqMask(Img); const std::vector &Devices = ContextImpl->getDevices(); std::vector PiDevices(Devices.size()); @@ -385,7 +389,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, ProgramPtr BuiltProgram = build(std::move(ProgramManaged), ContextImpl, Img.getCompileOptions(), Img.getLinkOptions(), PiDevices, - ContextImpl->getCachedLibPrograms(), LinkDeviceLibs); + ContextImpl->getCachedLibPrograms(), DeviceLibReqMask); return BuiltProgram.release(); }; @@ -506,15 +510,15 @@ static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, static const char *getDeviceLibFilename(DeviceLibExt Extension) { switch (Extension) { - case cl_intel_devicelib_assert: + case DeviceLibExt::cl_intel_devicelib_assert: return "libsycl-fallback-cassert.spv"; - case cl_intel_devicelib_math: + case DeviceLibExt::cl_intel_devicelib_math: return "libsycl-fallback-cmath.spv"; - case cl_intel_devicelib_math_fp64: + case DeviceLibExt::cl_intel_devicelib_math_fp64: return "libsycl-fallback-cmath-fp64.spv"; - case cl_intel_devicelib_complex: + case DeviceLibExt::cl_intel_devicelib_complex: return "libsycl-fallback-complex.spv"; - case cl_intel_devicelib_complex_fp64: + case DeviceLibExt::cl_intel_devicelib_complex_fp64: return "libsycl-fallback-complex-fp64.spv"; } throw compile_program_error("Unhandled (new?) device library extension", @@ -523,15 +527,15 @@ static const char *getDeviceLibFilename(DeviceLibExt Extension) { static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { switch (Extension) { - case cl_intel_devicelib_assert: + case DeviceLibExt::cl_intel_devicelib_assert: return "cl_intel_devicelib_assert"; - case cl_intel_devicelib_math: + case DeviceLibExt::cl_intel_devicelib_math: return "cl_intel_devicelib_math"; - case cl_intel_devicelib_math_fp64: + case DeviceLibExt::cl_intel_devicelib_math_fp64: return "cl_intel_devicelib_math_fp64"; - case cl_intel_devicelib_complex: + case DeviceLibExt::cl_intel_devicelib_complex: return "cl_intel_devicelib_complex"; - case cl_intel_devicelib_complex_fp64: + case DeviceLibExt::cl_intel_devicelib_complex_fp64: return "cl_intel_devicelib_complex_fp64"; } throw compile_program_error("Unhandled (new?) device library extension", @@ -673,21 +677,27 @@ ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, return *Img; } +static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { + uint32_t Mask = + 0x1 << (static_cast(Ext) - + static_cast(DeviceLibExt::cl_intel_devicelib_assert)); + return ((DeviceLibReqMask & Mask) == Mask); +} + static std::vector getDeviceLibPrograms(const ContextImplPtr Context, const std::vector &Devices, - std::map &CachedLibPrograms) { + std::map &CachedLibPrograms, + uint32_t DeviceLibReqMask) { std::vector Programs; - // TODO: SYCL compiler should generate a list of required extensions for a - // particular program in order to allow us do a more fine-grained check here. - // Require *all* possible devicelib extensions for now. std::pair RequiredDeviceLibExt[] = { - {cl_intel_devicelib_assert, /* is fallback loaded? */ false}, - {cl_intel_devicelib_math, false}, - {cl_intel_devicelib_math_fp64, false}, - {cl_intel_devicelib_complex, false}, - {cl_intel_devicelib_complex_fp64, false}}; + {DeviceLibExt::cl_intel_devicelib_assert, + /* is fallback loaded? */ false}, + {DeviceLibExt::cl_intel_devicelib_math, false}, + {DeviceLibExt::cl_intel_devicelib_math_fp64, false}, + {DeviceLibExt::cl_intel_devicelib_complex, false}, + {DeviceLibExt::cl_intel_devicelib_complex_fp64, false}}; // Disable all devicelib extensions requiring fp64 support if at least // one underlying device doesn't support cl_khr_fp64. @@ -714,8 +724,11 @@ getDeviceLibPrograms(const ContextImplPtr Context, continue; } - if ((Ext == cl_intel_devicelib_math_fp64 || - Ext == cl_intel_devicelib_complex_fp64) && + if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) { + continue; + } + if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || + Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) && !fp64Support) { continue; } @@ -745,7 +758,7 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, const string_class &LinkOptions, const std::vector &Devices, std::map &CachedLibPrograms, - bool LinkDeviceLibs) { + uint32_t DeviceLibReqMask) { if (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::build(" << Program.get() << ", " @@ -753,6 +766,7 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, << Devices.size() << ")\n"; } + bool LinkDeviceLibs = (DeviceLibReqMask != 0); const char *CompileOpts = std::getenv("SYCL_PROGRAM_COMPILE_OPTIONS"); if (!CompileOpts) { CompileOpts = CompileOptions.c_str(); @@ -772,14 +786,13 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, std::vector LinkPrograms; if (LinkDeviceLibs) { - LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms); + LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms, + DeviceLibReqMask); } const detail::plugin &Plugin = Context->getPlugin(); if (LinkPrograms.empty()) { std::string Opts(CompileOpts); - Opts += " "; - Opts += LinkOpts; RT::PiResult Error = Plugin.call_nocheck( Program.get(), Devices.size(), Devices.data(), Opts.c_str(), nullptr, @@ -972,6 +985,18 @@ void ProgramManager::flushSpecConstants(const program_impl &Prg, Prg.flush_spec_constants(*Img, NativePrg); } +// If the kernel is loaded from spv file, it may not include DeviceLib require +// mask, sycl runtime won't know which fallback device libraries are needed. In +// such case, the safest way is to load all fallback device libraries. +uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { + const pi::DeviceBinaryImage::PropertyRange &DLMRange = + Img.getDeviceLibReqMask(); + if (DLMRange.isAvailable()) + return pi::DeviceBinaryProperty(*(DLMRange.begin())).asUint32(); + else + return 0xFFFFFFFF; +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 721de1971aa0d..5d782df29c36a 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #pragma once - #include #include #include @@ -17,6 +16,7 @@ #include #include +#include #include #include #include @@ -43,9 +43,11 @@ namespace detail { class context_impl; using ContextImplPtr = std::shared_ptr; class program_impl; - -enum DeviceLibExt { - cl_intel_devicelib_assert = 0, +// DeviceLibExt is shared between sycl runtime and sycl-post-link tool. +// If any update is made here, need to sync with DeviceLibExt definition +// in llvm/tools/sycl-post-link/sycl-post-link.cpp +enum class DeviceLibExt : std::uint32_t { + cl_intel_devicelib_assert, cl_intel_devicelib_math, cl_intel_devicelib_math_fp64, cl_intel_devicelib_complex, @@ -106,6 +108,7 @@ class ProgramManager { void flushSpecConstants(const program_impl &Prg, pi::PiProgram NativePrg = nullptr, const RTDeviceBinaryImage *Img = nullptr); + uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); private: ProgramManager(); @@ -123,7 +126,7 @@ class ProgramManager { const string_class &LinkOptions, const std::vector &Devices, std::map &CachedLibPrograms, - bool LinkDeviceLibs = false); + uint32_t DeviceLibReqMask); /// Provides a new kernel set id for grouping kernel names together KernelSetId getNextKernelSetId() const; /// Returns the kernel set associated with the kernel, handles some special diff --git a/sycl/test/program_manager/env_vars.cpp b/sycl/test/program_manager/env_vars.cpp index 2602c1d651207..3f884c0765bcc 100644 --- a/sycl/test/program_manager/env_vars.cpp +++ b/sycl/test/program_manager/env_vars.cpp @@ -5,15 +5,12 @@ // // RUN: %CPU_RUN_PLACEHOLDER SYCL_PROGRAM_COMPILE_OPTIONS="-g" %t.out // RUN: %GPU_RUN_PLACEHOLDER SYCL_PROGRAM_COMPILE_OPTIONS="-g" %t.out -// RUN: %CPU_RUN_PLACEHOLDER SYCL_PROGRAM_LINK_OPTIONS="-enable-link-options -cl-denorms-are-zero" %t.out -// RUN: %GPU_RUN_PLACEHOLDER SYCL_PROGRAM_LINK_OPTIONS="-enable-link-options -cl-denorms-are-zero" %t.out // // Now test for invalid options to make sure they are really passed to // a device compiler. Intel GPU runtime doesn't give an error for // invalid options, so we don't test it here. // // RUN: %CPU_RUN_PLACEHOLDER SYCL_PROGRAM_COMPILE_OPTIONS="-enable-link-options -cl-denorms-are-zero" SHOULD_CRASH=1 %t.out -// RUN: %CPU_RUN_PLACEHOLDER SYCL_PROGRAM_LINK_OPTIONS="-g" SHOULD_CRASH=1 %t.out #include #include