diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 219f9fba74551..3c3311438e98a 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -189,6 +189,7 @@ endif() if (SYCL_ENABLE_XPTI_TRACING) set(XPTIFW_LIBS xpti xptifw) + set(SYCL_TOOLS sycl-sanitizer) endif() # SYCL toolchain builds all components: compiler, libraries, headers, etc. @@ -215,6 +216,7 @@ add_custom_target( sycl-toolchain sycl-compiler sycl-ls ${XPTIFW_LIBS} + ${SYCL_TOOLS} COMMENT "Building SYCL compiler toolchain..." ) @@ -277,6 +279,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS pi_level_zero libsycldevice ${XPTIFW_LIBS} + ${SYCL_TOOLS} ) if(OpenCL_INSTALL_KHRONOS_ICD_LOADER AND TARGET OpenCL-ICD) diff --git a/sycl/include/CL/sycl/detail/usm_impl.hpp b/sycl/include/CL/sycl/detail/usm_impl.hpp index 8be99fede9059..ebe71674f6d5d 100644 --- a/sycl/include/CL/sycl/detail/usm_impl.hpp +++ b/sycl/include/CL/sycl/detail/usm_impl.hpp @@ -7,6 +7,7 @@ // ===--------------------------------------------------------------------=== // #pragma once +#include #include #include @@ -17,13 +18,16 @@ namespace usm { __SYCL_EXPORT void *alignedAlloc(size_t Alignment, size_t Bytes, const context &Ctxt, const device &Dev, - cl::sycl::usm::alloc Kind); + cl::sycl::usm::alloc Kind, + const code_location &CL); __SYCL_EXPORT void *alignedAllocHost(size_t Alignment, size_t Bytes, const context &Ctxt, - cl::sycl::usm::alloc Kind); + cl::sycl::usm::alloc Kind, + const code_location &CL); -__SYCL_EXPORT void free(void *Ptr, const context &Ctxt); +__SYCL_EXPORT void free(void *Ptr, const context &Ctxt, + const code_location &CL); } // namespace usm } // namespace detail diff --git a/sycl/include/CL/sycl/usm.hpp b/sycl/include/CL/sycl/usm.hpp index ba91029144fb5..6b4542e7f2ee0 100644 --- a/sycl/include/CL/sycl/usm.hpp +++ b/sycl/include/CL/sycl/usm.hpp @@ -18,205 +18,269 @@ namespace sycl { /// // Explicit USM /// -__SYCL_EXPORT void *malloc_device(size_t size, const device &dev, - const context &ctxt); -__SYCL_EXPORT void *malloc_device(size_t size, const device &dev, - const context &ctxt, - const property_list &propList); -__SYCL_EXPORT void *malloc_device(size_t size, const queue &q); -__SYCL_EXPORT void *malloc_device(size_t size, const queue &q, - const property_list &propList); - -__SYCL_EXPORT void *aligned_alloc_device(size_t alignment, size_t size, - const device &dev, - const context &ctxt); -__SYCL_EXPORT void *aligned_alloc_device(size_t alignment, size_t size, - const device &dev, const context &ctxt, - const property_list &propList); -__SYCL_EXPORT void *aligned_alloc_device(size_t alignment, size_t size, - const queue &q); -__SYCL_EXPORT void *aligned_alloc_device(size_t alignment, size_t size, - const queue &q, - const property_list &propList); - -__SYCL_EXPORT void free(void *ptr, const context &ctxt); -__SYCL_EXPORT void free(void *ptr, const queue &q); +__SYCL_EXPORT void *malloc_device( + size_t size, const device &dev, const context &ctxt, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *malloc_device( + size_t size, const device &dev, const context &ctxt, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *malloc_device( + size_t size, const queue &q, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *malloc_device( + size_t size, const queue &q, const property_list &propList, + const detail::code_location CL = detail::code_location::current()); + +__SYCL_EXPORT void *aligned_alloc_device( + size_t alignment, size_t size, const device &dev, const context &ctxt, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_device( + size_t alignment, size_t size, const device &dev, const context &ctxt, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_device( + size_t alignment, size_t size, const queue &q, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_device( + size_t alignment, size_t size, const queue &q, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); + +__SYCL_EXPORT void +free(void *ptr, const context &ctxt, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void +free(void *ptr, const queue &q, + const detail::code_location CL = detail::code_location::current()); /// // Restricted USM /// -__SYCL_EXPORT void *malloc_host(size_t size, const context &ctxt); -__SYCL_EXPORT void *malloc_host(size_t size, const context &ctxt, - const property_list &propList); -__SYCL_EXPORT void *malloc_host(size_t size, const queue &q); -__SYCL_EXPORT void *malloc_host(size_t size, const queue &q, - const property_list &propList); - -__SYCL_EXPORT void *malloc_shared(size_t size, const device &dev, - const context &ctxt); -__SYCL_EXPORT void *malloc_shared(size_t size, const device &dev, - const context &ctxt, - const property_list &propList); -__SYCL_EXPORT void *malloc_shared(size_t size, const queue &q); -__SYCL_EXPORT void *malloc_shared(size_t size, const queue &q, - const property_list &propList); - -__SYCL_EXPORT void *aligned_alloc_host(size_t alignment, size_t size, - const context &ctxt); -__SYCL_EXPORT void *aligned_alloc_host(size_t alignment, size_t size, - const context &ctxt, - const property_list &propList); -__SYCL_EXPORT void *aligned_alloc_host(size_t alignment, size_t size, - const queue &q); -__SYCL_EXPORT void *aligned_alloc_host(size_t alignment, size_t size, - const queue &q, - const property_list &propList); - -__SYCL_EXPORT void *aligned_alloc_shared(size_t alignment, size_t size, - const device &dev, - const context &ctxt); -__SYCL_EXPORT void *aligned_alloc_shared(size_t alignment, size_t size, - const device &dev, const context &ctxt, - const property_list &propList); -__SYCL_EXPORT void *aligned_alloc_shared(size_t alignment, size_t size, - const queue &q); -__SYCL_EXPORT void *aligned_alloc_shared(size_t alignment, size_t size, - const queue &q, - const property_list &propList); +__SYCL_EXPORT void * +malloc_host(size_t size, const context &ctxt, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void * +malloc_host(size_t size, const context &ctxt, const property_list &propList, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void * +malloc_host(size_t size, const queue &q, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void * +malloc_host(size_t size, const queue &q, const property_list &propList, + const detail::code_location CL = detail::code_location::current()); + +__SYCL_EXPORT void *malloc_shared( + size_t size, const device &dev, const context &ctxt, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *malloc_shared( + size_t size, const device &dev, const context &ctxt, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *malloc_shared( + size_t size, const queue &q, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *malloc_shared( + size_t size, const queue &q, const property_list &propList, + const detail::code_location CL = detail::code_location::current()); + +__SYCL_EXPORT void *aligned_alloc_host( + size_t alignment, size_t size, const context &ctxt, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_host( + size_t alignment, size_t size, const context &ctxt, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_host( + size_t alignment, size_t size, const queue &q, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_host( + size_t alignment, size_t size, const queue &q, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); + +__SYCL_EXPORT void *aligned_alloc_shared( + size_t alignment, size_t size, const device &dev, const context &ctxt, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_shared( + size_t alignment, size_t size, const device &dev, const context &ctxt, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_shared( + size_t alignment, size_t size, const queue &q, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc_shared( + size_t alignment, size_t size, const queue &q, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); /// // single form /// -__SYCL_EXPORT void *malloc(size_t size, const device &dev, const context &ctxt, - usm::alloc kind); -__SYCL_EXPORT void *malloc(size_t size, const device &dev, const context &ctxt, - usm::alloc kind, const property_list &propList); -__SYCL_EXPORT void *malloc(size_t size, const queue &q, usm::alloc kind); -__SYCL_EXPORT void *malloc(size_t size, const queue &q, usm::alloc kind, - const property_list &propList); - -__SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, - const device &dev, const context &ctxt, - usm::alloc kind); -__SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, - const device &dev, const context &ctxt, - usm::alloc kind, - const property_list &propList); -__SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, const queue &q, - usm::alloc kind); -__SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, const queue &q, - usm::alloc kind, - const property_list &propList); +__SYCL_EXPORT void * +malloc(size_t size, const device &dev, const context &ctxt, usm::alloc kind, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void * +malloc(size_t size, const device &dev, const context &ctxt, usm::alloc kind, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void * +malloc(size_t size, const queue &q, usm::alloc kind, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void * +malloc(size_t size, const queue &q, usm::alloc kind, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); + +__SYCL_EXPORT void *aligned_alloc( + size_t alignment, size_t size, const device &dev, const context &ctxt, + usm::alloc kind, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc( + size_t alignment, size_t size, const device &dev, const context &ctxt, + usm::alloc kind, const property_list &propList, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc( + size_t alignment, size_t size, const queue &q, usm::alloc kind, + const detail::code_location CL = detail::code_location::current()); +__SYCL_EXPORT void *aligned_alloc( + size_t alignment, size_t size, const queue &q, usm::alloc kind, + const property_list &propList, + const detail::code_location CL = detail::code_location::current()); /// // Template forms /// template -T *malloc_device(size_t Count, const device &Dev, const context &Ctxt, - const property_list &PropList = {}) { +T *malloc_device( + size_t Count, const device &Dev, const context &Ctxt, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { return static_cast( - malloc_device(Count * sizeof(T), Dev, Ctxt, PropList)); + malloc_device(Count * sizeof(T), Dev, Ctxt, PropList, CL)); } template -T *malloc_device(size_t Count, const queue &Q, - const property_list &PropList = {}) { - return malloc_device(Count, Q.get_device(), Q.get_context(), PropList); +T *malloc_device( + size_t Count, const queue &Q, const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return malloc_device(Count, Q.get_device(), Q.get_context(), PropList, CL); } template -T *aligned_alloc_device(size_t Alignment, size_t Count, const device &Dev, - const context &Ctxt, - const property_list &PropList = {}) { - return static_cast( - aligned_alloc_device(Alignment, Count * sizeof(T), Dev, Ctxt, PropList)); +T *aligned_alloc_device( + size_t Alignment, size_t Count, const device &Dev, const context &Ctxt, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return static_cast(aligned_alloc_device(Alignment, Count * sizeof(T), + Dev, Ctxt, PropList, CL)); } template -T *aligned_alloc_device(size_t Alignment, size_t Count, const queue &Q, - const property_list &PropList = {}) { +T *aligned_alloc_device( + size_t Alignment, size_t Count, const queue &Q, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { return aligned_alloc_device(Alignment, Count, Q.get_device(), - Q.get_context(), PropList); + Q.get_context(), PropList, CL); } template -T *malloc_host(size_t Count, const context &Ctxt, - const property_list &PropList = {}) { - return static_cast(malloc_host(Count * sizeof(T), Ctxt, PropList)); +T *malloc_host( + size_t Count, const context &Ctxt, const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return static_cast(malloc_host(Count * sizeof(T), Ctxt, PropList, CL)); } template -T *malloc_host(size_t Count, const queue &Q, - const property_list &PropList = {}) { - return malloc_host(Count, Q.get_context(), PropList); +T *malloc_host( + size_t Count, const queue &Q, const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return malloc_host(Count, Q.get_context(), PropList, CL); } template -T *malloc_shared(size_t Count, const device &Dev, const context &Ctxt, - const property_list &PropList = {}) { +T *malloc_shared( + size_t Count, const device &Dev, const context &Ctxt, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { return static_cast( - malloc_shared(Count * sizeof(T), Dev, Ctxt, PropList)); + malloc_shared(Count * sizeof(T), Dev, Ctxt, PropList, CL)); } template -T *malloc_shared(size_t Count, const queue &Q, - const property_list &PropList = {}) { - return malloc_shared(Count, Q.get_device(), Q.get_context(), PropList); +T *malloc_shared( + size_t Count, const queue &Q, const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return malloc_shared(Count, Q.get_device(), Q.get_context(), PropList, CL); } template -T *aligned_alloc_host(size_t Alignment, size_t Count, const context &Ctxt, - const property_list &PropList = {}) { +T *aligned_alloc_host( + size_t Alignment, size_t Count, const context &Ctxt, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { return static_cast( - aligned_alloc_host(Alignment, Count * sizeof(T), Ctxt, PropList)); + aligned_alloc_host(Alignment, Count * sizeof(T), Ctxt, PropList, CL)); } template -T *aligned_alloc_host(size_t Alignment, size_t Count, const queue &Q, - const property_list &PropList = {}) { - return aligned_alloc_host(Alignment, Count, Q.get_context(), PropList); +T *aligned_alloc_host( + size_t Alignment, size_t Count, const queue &Q, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return aligned_alloc_host(Alignment, Count, Q.get_context(), PropList, CL); } template -T *aligned_alloc_shared(size_t Alignment, size_t Count, const device &Dev, - const context &Ctxt, - const property_list &PropList = {}) { - return static_cast( - aligned_alloc_shared(Alignment, Count * sizeof(T), Dev, Ctxt, PropList)); +T *aligned_alloc_shared( + size_t Alignment, size_t Count, const device &Dev, const context &Ctxt, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return static_cast(aligned_alloc_shared(Alignment, Count * sizeof(T), + Dev, Ctxt, PropList, CL)); } template -T *aligned_alloc_shared(size_t Alignment, size_t Count, const queue &Q, - const property_list &PropList = {}) { +T *aligned_alloc_shared( + size_t Alignment, size_t Count, const queue &Q, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { return aligned_alloc_shared(Alignment, Count, Q.get_device(), - Q.get_context(), PropList); + Q.get_context(), PropList, CL); } template T *malloc(size_t Count, const device &Dev, const context &Ctxt, usm::alloc Kind, - const property_list &PropList = {}) { - return static_cast(malloc(Count * sizeof(T), Dev, Ctxt, Kind, PropList)); + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return static_cast( + malloc(Count * sizeof(T), Dev, Ctxt, Kind, PropList, CL)); } template T *malloc(size_t Count, const queue &Q, usm::alloc Kind, - const property_list &PropList = {}) { - return malloc(Count, Q.get_device(), Q.get_context(), Kind, PropList); + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return malloc(Count, Q.get_device(), Q.get_context(), Kind, PropList, CL); } template -T *aligned_alloc(size_t Alignment, size_t Count, const device &Dev, - const context &Ctxt, usm::alloc Kind, - const property_list &PropList = {}) { - return static_cast( - aligned_alloc(Alignment, Count * sizeof(T), Dev, Ctxt, Kind, PropList)); +T *aligned_alloc( + size_t Alignment, size_t Count, const device &Dev, const context &Ctxt, + usm::alloc Kind, const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { + return static_cast(aligned_alloc(Alignment, Count * sizeof(T), Dev, Ctxt, + Kind, PropList, CL)); } template -T *aligned_alloc(size_t Alignment, size_t Count, const queue &Q, - usm::alloc Kind, const property_list &PropList = {}) { +T *aligned_alloc( + size_t Alignment, size_t Count, const queue &Q, usm::alloc Kind, + const property_list &PropList = {}, + const detail::code_location CL = detail::code_location::current()) { return aligned_alloc(Alignment, Count, Q.get_device(), Q.get_context(), - Kind, PropList); + Kind, PropList, CL); } // Pointer queries diff --git a/sycl/include/CL/sycl/usm/usm_allocator.hpp b/sycl/include/CL/sycl/usm/usm_allocator.hpp index 9bcc63d521e13..6fff25ddbded5 100644 --- a/sycl/include/CL/sycl/usm/usm_allocator.hpp +++ b/sycl/include/CL/sycl/usm/usm_allocator.hpp @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include #include @@ -24,8 +25,10 @@ namespace sycl { __SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, const device &dev, const context &ctxt, usm::alloc kind, - const property_list &propList); -__SYCL_EXPORT void free(void *ptr, const context &ctxt); + const property_list &propList, + const detail::code_location CL); +__SYCL_EXPORT void free(void *ptr, const context &ctxt, + const detail::code_location CL); template class usm_allocator { @@ -74,11 +77,12 @@ class usm_allocator { /// Allocates memory. /// /// \param NumberOfElements is a count of elements to allocate memory for. - T *allocate(size_t NumberOfElements) { + T *allocate(size_t NumberOfElements, const detail::code_location CL = + detail::code_location::current()) { auto Result = reinterpret_cast( aligned_alloc(getAlignment(), NumberOfElements * sizeof(value_type), - MDevice, MContext, AllocKind, MPropList)); + MDevice, MContext, AllocKind, MPropList, CL)); if (!Result) { throw memory_allocation_error(); } @@ -89,9 +93,11 @@ class usm_allocator { /// /// \param Ptr is a pointer to memory being deallocated. /// \param Size is a number of elements previously passed to allocate. - void deallocate(T *Ptr, size_t) { + void deallocate( + T *Ptr, size_t, + const detail::code_location CL = detail::code_location::current()) { if (Ptr) { - free(Ptr, MContext); + free(Ptr, MContext, CL); } } diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 54d530c5ca3cd..28ba7c36b9dd3 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -15,6 +15,25 @@ #include #include +#include + +#ifdef XPTI_ENABLE_INSTRUMENTATION +// Include the headers necessary for emitting +// traces using the trace framework +#include "xpti/xpti_trace_framework.hpp" + +#define XPTI_CREATE_TRACEPOINT(CL) \ + std::unique_ptr _TP(nullptr); \ + if (xptiTraceEnabled()) { \ + xpti::payload_t Payload{CL.functionName(), CL.fileName(), \ + static_cast(CL.lineNumber()), \ + static_cast(CL.columnNumber()), nullptr}; \ + _TP = std::make_unique(&Payload); \ + } \ + (void)_TP; +#else +#define XPTI_CREATE_TRACEPOINT(CL) +#endif __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -25,7 +44,8 @@ namespace detail { namespace usm { void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, - alloc Kind) { + alloc Kind, const detail::code_location &CL) { + XPTI_CREATE_TRACEPOINT(CL); void *RetVal = nullptr; if (Size == 0) return nullptr; @@ -72,7 +92,9 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, } void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt, - const device &Dev, alloc Kind) { + const device &Dev, alloc Kind, + const detail::code_location &CL) { + XPTI_CREATE_TRACEPOINT(CL); void *RetVal = nullptr; if (Size == 0) return nullptr; @@ -129,7 +151,8 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt, return RetVal; } -void free(void *Ptr, const context &Ctxt) { +void free(void *Ptr, const context &Ctxt, const detail::code_location &CL) { + XPTI_CREATE_TRACEPOINT(CL); if (Ptr == nullptr) return; if (Ctxt.is_host()) { @@ -143,182 +166,231 @@ void free(void *Ptr, const context &Ctxt) { } } +// For ABI compatibility +__SYCL_EXPORT void *alignedAllocHost(size_t Alignment, size_t Size, + const context &Ctxt, alloc Kind) { + return alignedAllocHost(Alignment, Size, Ctxt, Kind, detail::code_location{}); +} + +__SYCL_EXPORT void free(void *Ptr, const context &Ctxt) { + detail::usm::free(Ptr, Ctxt, detail::code_location{}); +} + +__SYCL_EXPORT void *alignedAlloc(size_t Alignment, size_t Size, + const context &Ctxt, const device &Dev, + alloc Kind) { + return alignedAlloc(Alignment, Size, Ctxt, Dev, Kind, + detail::code_location{}); +} + } // namespace usm } // namespace detail -void *malloc_device(size_t Size, const device &Dev, const context &Ctxt) { - return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device); +void *malloc_device(size_t Size, const device &Dev, const context &Ctxt, + const detail::code_location CL) { + return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device, CL); } void *malloc_device(size_t Size, const device &Dev, const context &Ctxt, - const property_list &) { - return malloc_device(Size, Dev, Ctxt); + const property_list &, const detail::code_location CL) { + return malloc_device(Size, Dev, Ctxt, CL); } -void *malloc_device(size_t Size, const queue &Q) { - return malloc_device(Size, Q.get_device(), Q.get_context()); +void *malloc_device(size_t Size, const queue &Q, + const detail::code_location CL) { + return malloc_device(Size, Q.get_device(), Q.get_context(), CL); } -void *malloc_device(size_t Size, const queue &Q, - const property_list &PropList) { - return malloc_device(Size, Q.get_device(), Q.get_context(), PropList); +void *malloc_device(size_t Size, const queue &Q, const property_list &PropList, + const detail::code_location CL) { + return malloc_device(Size, Q.get_device(), Q.get_context(), PropList, CL); } void *aligned_alloc_device(size_t Alignment, size_t Size, const device &Dev, - const context &Ctxt) { - return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::device); + const context &Ctxt, + const detail::code_location CL) { + return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::device, + CL); } void *aligned_alloc_device(size_t Alignment, size_t Size, const device &Dev, - const context &Ctxt, const property_list &) { - return aligned_alloc_device(Alignment, Size, Dev, Ctxt); + const context &Ctxt, const property_list &, + const detail::code_location CL) { + return aligned_alloc_device(Alignment, Size, Dev, Ctxt, CL); } -void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q) { - return aligned_alloc_device(Alignment, Size, Q.get_device(), Q.get_context()); +void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q, + const detail::code_location CL) { + return aligned_alloc_device(Alignment, Size, Q.get_device(), Q.get_context(), + CL); } void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q, - const property_list &PropList) { + const property_list &PropList, + const detail::code_location CL) { return aligned_alloc_device(Alignment, Size, Q.get_device(), Q.get_context(), - PropList); + PropList, CL); } -void free(void *ptr, const context &Ctxt) { - return detail::usm::free(ptr, Ctxt); +void free(void *ptr, const context &Ctxt, const detail::code_location CL) { + return detail::usm::free(ptr, Ctxt, CL); } -void free(void *ptr, const queue &Q) { return free(ptr, Q.get_context()); } +void free(void *ptr, const queue &Q, const detail::code_location CL) { + return free(ptr, Q.get_context(), CL); +} /// // Restricted USM /// -void *malloc_host(size_t Size, const context &Ctxt) { - return detail::usm::alignedAllocHost(0, Size, Ctxt, alloc::host); +void *malloc_host(size_t Size, const context &Ctxt, + const detail::code_location CL) { + return detail::usm::alignedAllocHost(0, Size, Ctxt, alloc::host, CL); } -void *malloc_host(size_t Size, const context &Ctxt, const property_list &) { - return malloc_host(Size, Ctxt); +void *malloc_host(size_t Size, const context &Ctxt, const property_list &, + const detail::code_location CL) { + return malloc_host(Size, Ctxt, CL); } -void *malloc_host(size_t Size, const queue &Q) { - return malloc_host(Size, Q.get_context()); +void *malloc_host(size_t Size, const queue &Q, const detail::code_location CL) { + return malloc_host(Size, Q.get_context(), CL); } -void *malloc_host(size_t Size, const queue &Q, const property_list &PropList) { - return malloc_host(Size, Q.get_context(), PropList); +void *malloc_host(size_t Size, const queue &Q, const property_list &PropList, + const detail::code_location CL) { + return malloc_host(Size, Q.get_context(), PropList, CL); } -void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt) { - return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::shared); +void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt, + const detail::code_location CL) { + return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::shared, CL); } void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt, - const property_list &) { - return malloc_shared(Size, Dev, Ctxt); + const property_list &, const detail::code_location CL) { + return malloc_shared(Size, Dev, Ctxt, CL); } -void *malloc_shared(size_t Size, const queue &Q) { - return malloc_shared(Size, Q.get_device(), Q.get_context()); +void *malloc_shared(size_t Size, const queue &Q, + const detail::code_location CL) { + return malloc_shared(Size, Q.get_device(), Q.get_context(), CL); } -void *malloc_shared(size_t Size, const queue &Q, - const property_list &PropList) { - return malloc_shared(Size, Q.get_device(), Q.get_context(), PropList); +void *malloc_shared(size_t Size, const queue &Q, const property_list &PropList, + const detail::code_location CL) { + return malloc_shared(Size, Q.get_device(), Q.get_context(), PropList, CL); } -void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt) { - return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, alloc::host); +void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt, + const detail::code_location CL) { + return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, alloc::host, CL); } void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt, - const property_list &) { - return aligned_alloc_host(Alignment, Size, Ctxt); + const property_list &, + const detail::code_location CL) { + return aligned_alloc_host(Alignment, Size, Ctxt, CL); } -void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q) { - return aligned_alloc_host(Alignment, Size, Q.get_context()); +void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q, + const detail::code_location CL) { + return aligned_alloc_host(Alignment, Size, Q.get_context(), CL); } void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q, - const property_list &PropList) { - return aligned_alloc_host(Alignment, Size, Q.get_context(), PropList); + const property_list &PropList, + const detail::code_location CL) { + return aligned_alloc_host(Alignment, Size, Q.get_context(), PropList, CL); } void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev, - const context &Ctxt) { - return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::shared); + const context &Ctxt, + const detail::code_location CL) { + return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::shared, + CL); } void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev, - const context &Ctxt, const property_list &) { - return aligned_alloc_shared(Alignment, Size, Dev, Ctxt); + const context &Ctxt, const property_list &, + const detail::code_location CL) { + return aligned_alloc_shared(Alignment, Size, Dev, Ctxt, CL); } -void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q) { - return aligned_alloc_shared(Alignment, Size, Q.get_device(), Q.get_context()); +void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q, + const detail::code_location CL) { + return aligned_alloc_shared(Alignment, Size, Q.get_device(), Q.get_context(), + CL); } void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q, - const property_list &PropList) { + const property_list &PropList, + const detail::code_location CL) { return aligned_alloc_shared(Alignment, Size, Q.get_device(), Q.get_context(), - PropList); + PropList, CL); } // single form -void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind) { +void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind, + const detail::code_location CL) { void *RetVal = nullptr; if (Kind == alloc::host) { - RetVal = detail::usm::alignedAllocHost(0, Size, Ctxt, Kind); + RetVal = detail::usm::alignedAllocHost(0, Size, Ctxt, Kind, CL); } else { - RetVal = detail::usm::alignedAlloc(0, Size, Ctxt, Dev, Kind); + RetVal = detail::usm::alignedAlloc(0, Size, Ctxt, Dev, Kind, CL); } return RetVal; } void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind, - const property_list &) { - return malloc(Size, Dev, Ctxt, Kind); + const property_list &, const detail::code_location CL) { + return malloc(Size, Dev, Ctxt, Kind, CL); } -void *malloc(size_t Size, const queue &Q, alloc Kind) { - return malloc(Size, Q.get_device(), Q.get_context(), Kind); +void *malloc(size_t Size, const queue &Q, alloc Kind, + const detail::code_location CL) { + return malloc(Size, Q.get_device(), Q.get_context(), Kind, CL); } void *malloc(size_t Size, const queue &Q, alloc Kind, - const property_list &PropList) { - return malloc(Size, Q.get_device(), Q.get_context(), Kind, PropList); + const property_list &PropList, const detail::code_location CL) { + return malloc(Size, Q.get_device(), Q.get_context(), Kind, PropList, CL); } void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev, - const context &Ctxt, alloc Kind) { + const context &Ctxt, alloc Kind, + const detail::code_location CL) { void *RetVal = nullptr; if (Kind == alloc::host) { - RetVal = detail::usm::alignedAllocHost(Alignment, Size, Ctxt, Kind); + RetVal = detail::usm::alignedAllocHost(Alignment, Size, Ctxt, Kind, CL); } else { - RetVal = detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, Kind); + RetVal = detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, Kind, CL); } return RetVal; } void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev, - const context &Ctxt, alloc Kind, const property_list &) { - return aligned_alloc(Alignment, Size, Dev, Ctxt, Kind); + const context &Ctxt, alloc Kind, const property_list &, + const detail::code_location CL) { + return aligned_alloc(Alignment, Size, Dev, Ctxt, Kind, CL); } -void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind) { - return aligned_alloc(Alignment, Size, Q.get_device(), Q.get_context(), Kind); +void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind, + const detail::code_location CL) { + return aligned_alloc(Alignment, Size, Q.get_device(), Q.get_context(), Kind, + CL); } void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind, - const property_list &PropList) { + const property_list &PropList, + const detail::code_location CL) { return aligned_alloc(Alignment, Size, Q.get_device(), Q.get_context(), Kind, - PropList); + PropList, CL); } // Pointer queries @@ -416,5 +488,227 @@ device get_pointer_device(const void *Ptr, const context &Ctxt) { PI_INVALID_OPERATION); } +// For ABI compatibility + +__SYCL_EXPORT void *malloc_device(size_t Size, const device &Dev, + const context &Ctxt) { + return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device, + detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_device(size_t Size, const device &Dev, + const context &Ctxt, const property_list &) { + return malloc_device(Size, Dev, Ctxt, detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_device(size_t Size, const queue &Q) { + return malloc_device(Size, Q.get_device(), Q.get_context(), + detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_device(size_t Size, const queue &Q, + const property_list &PropList) { + return malloc_device(Size, Q.get_device(), Q.get_context(), PropList, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_device(size_t Alignment, size_t Size, + const device &Dev, + const context &Ctxt) { + return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::device, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_device(size_t Alignment, size_t Size, + const device &Dev, const context &Ctxt, + const property_list &) { + return aligned_alloc_device(Alignment, Size, Dev, Ctxt, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_device(size_t Alignment, size_t Size, + const queue &Q) { + return aligned_alloc_device(Alignment, Size, Q.get_device(), Q.get_context(), + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_device(size_t Alignment, size_t Size, + const queue &Q, + const property_list &PropList) { + return aligned_alloc_device(Alignment, Size, Q.get_device(), Q.get_context(), + PropList, detail::code_location{}); +} + +__SYCL_EXPORT void free(void *ptr, const context &Ctxt) { + return detail::usm::free(ptr, Ctxt, detail::code_location{}); +} + +__SYCL_EXPORT void free(void *ptr, const queue &Q) { + return free(ptr, Q.get_context(), detail::code_location{}); +} + +/// +// Restricted USM +/// +__SYCL_EXPORT void *malloc_host(size_t Size, const context &Ctxt) { + return detail::usm::alignedAllocHost(0, Size, Ctxt, alloc::host, + detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_host(size_t Size, const context &Ctxt, + const property_list &) { + return malloc_host(Size, Ctxt, detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_host(size_t Size, const queue &Q) { + return malloc_host(Size, Q.get_context(), detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_host(size_t Size, const queue &Q, + const property_list &PropList) { + return malloc_host(Size, Q.get_context(), PropList, detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_shared(size_t Size, const device &Dev, + const context &Ctxt) { + return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::shared, + detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_shared(size_t Size, const device &Dev, + const context &Ctxt, const property_list &) { + return malloc_shared(Size, Dev, Ctxt, detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_shared(size_t Size, const queue &Q) { + return malloc_shared(Size, Q.get_device(), Q.get_context(), + detail::code_location{}); +} + +__SYCL_EXPORT void *malloc_shared(size_t Size, const queue &Q, + const property_list &PropList) { + return malloc_shared(Size, Q.get_device(), Q.get_context(), PropList, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_host(size_t Alignment, size_t Size, + const context &Ctxt) { + return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, alloc::host, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_host(size_t Alignment, size_t Size, + const context &Ctxt, + const property_list &) { + return aligned_alloc_host(Alignment, Size, Ctxt, detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_host(size_t Alignment, size_t Size, + const queue &Q) { + return aligned_alloc_host(Alignment, Size, Q.get_context(), + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_host(size_t Alignment, size_t Size, + const queue &Q, + const property_list &PropList) { + return aligned_alloc_host(Alignment, Size, Q.get_context(), PropList, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_shared(size_t Alignment, size_t Size, + const device &Dev, + const context &Ctxt) { + return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::shared, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_shared(size_t Alignment, size_t Size, + const device &Dev, const context &Ctxt, + const property_list &) { + return aligned_alloc_shared(Alignment, Size, Dev, Ctxt, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_shared(size_t Alignment, size_t Size, + const queue &Q) { + return aligned_alloc_shared(Alignment, Size, Q.get_device(), Q.get_context(), + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc_shared(size_t Alignment, size_t Size, + const queue &Q, + const property_list &PropList) { + return aligned_alloc_shared(Alignment, Size, Q.get_device(), Q.get_context(), + PropList, detail::code_location{}); +} + +// single form + +__SYCL_EXPORT void *malloc(size_t Size, const device &Dev, const context &Ctxt, + alloc Kind) { + void *RetVal = nullptr; + + if (Kind == alloc::host) { + RetVal = detail::usm::alignedAllocHost(0, Size, Ctxt, Kind, + detail::code_location{}); + } else { + RetVal = detail::usm::alignedAlloc(0, Size, Ctxt, Dev, Kind, + detail::code_location{}); + } + + return RetVal; +} + +__SYCL_EXPORT void *malloc(size_t Size, const device &Dev, const context &Ctxt, + alloc Kind, const property_list &) { + return malloc(Size, Dev, Ctxt, Kind, detail::code_location{}); +} + +__SYCL_EXPORT void *malloc(size_t Size, const queue &Q, alloc Kind) { + return malloc(Size, Q.get_device(), Q.get_context(), Kind, + detail::code_location{}); +} + +__SYCL_EXPORT void *malloc(size_t Size, const queue &Q, alloc Kind, + const property_list &PropList) { + return malloc(Size, Q.get_device(), Q.get_context(), Kind, PropList, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc(size_t Alignment, size_t Size, + const device &Dev, const context &Ctxt, + alloc Kind) { + void *RetVal = nullptr; + + if (Kind == alloc::host) { + RetVal = detail::usm::alignedAllocHost(Alignment, Size, Ctxt, Kind, + detail::code_location{}); + } else { + RetVal = detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, Kind, + detail::code_location{}); + } + + return RetVal; +} + +__SYCL_EXPORT void *aligned_alloc(size_t Alignment, size_t Size, + const device &Dev, const context &Ctxt, + alloc Kind, const property_list &) { + return aligned_alloc(Alignment, Size, Dev, Ctxt, Kind, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, + alloc Kind) { + return aligned_alloc(Alignment, Size, Q.get_device(), Q.get_context(), Kind, + detail::code_location{}); +} + +__SYCL_EXPORT void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, + alloc Kind, const property_list &PropList) { + return aligned_alloc(Alignment, Size, Q.get_device(), Q.get_context(), Kind, + PropList, detail::code_location{}); +} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 04d8c1cefabbe..0b554d694bf5a 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3599,21 +3599,37 @@ _ZN2cl4sycl10level_zero12make_contextERKSt6vectorINS0_6deviceESaIS3_EEmb _ZN2cl4sycl10level_zero12make_programERKNS0_7contextEm _ZN2cl4sycl10level_zero13make_platformEm _ZN2cl4sycl11malloc_hostEmRKNS0_5queueE +_ZN2cl4sycl11malloc_hostEmRKNS0_5queueENS0_6detail13code_locationE _ZN2cl4sycl11malloc_hostEmRKNS0_5queueERKNS0_13property_listE +_ZN2cl4sycl11malloc_hostEmRKNS0_5queueERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl11malloc_hostEmRKNS0_7contextE +_ZN2cl4sycl11malloc_hostEmRKNS0_7contextENS0_6detail13code_locationE _ZN2cl4sycl11malloc_hostEmRKNS0_7contextERKNS0_13property_listE +_ZN2cl4sycl11malloc_hostEmRKNS0_7contextERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl13aligned_allocEmmRKNS0_5queueENS0_3usm5allocE +_ZN2cl4sycl13aligned_allocEmmRKNS0_5queueENS0_3usm5allocENS0_6detail13code_locationE _ZN2cl4sycl13aligned_allocEmmRKNS0_5queueENS0_3usm5allocERKNS0_13property_listE +_ZN2cl4sycl13aligned_allocEmmRKNS0_5queueENS0_3usm5allocERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl13aligned_allocEmmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocE +_ZN2cl4sycl13aligned_allocEmmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocENS0_6detail13code_locationE _ZN2cl4sycl13aligned_allocEmmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocERKNS0_13property_listE +_ZN2cl4sycl13aligned_allocEmmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl13malloc_deviceEmRKNS0_5queueE +_ZN2cl4sycl13malloc_deviceEmRKNS0_5queueENS0_6detail13code_locationE _ZN2cl4sycl13malloc_deviceEmRKNS0_5queueERKNS0_13property_listE +_ZN2cl4sycl13malloc_deviceEmRKNS0_5queueERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl13malloc_deviceEmRKNS0_6deviceERKNS0_7contextE +_ZN2cl4sycl13malloc_deviceEmRKNS0_6deviceERKNS0_7contextENS0_6detail13code_locationE _ZN2cl4sycl13malloc_deviceEmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listE +_ZN2cl4sycl13malloc_deviceEmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl13malloc_sharedEmRKNS0_5queueE +_ZN2cl4sycl13malloc_sharedEmRKNS0_5queueENS0_6detail13code_locationE _ZN2cl4sycl13malloc_sharedEmRKNS0_5queueERKNS0_13property_listE +_ZN2cl4sycl13malloc_sharedEmRKNS0_5queueERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl13malloc_sharedEmRKNS0_6deviceERKNS0_7contextE +_ZN2cl4sycl13malloc_sharedEmRKNS0_6deviceERKNS0_7contextENS0_6detail13code_locationE _ZN2cl4sycl13malloc_sharedEmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listE +_ZN2cl4sycl13malloc_sharedEmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl13sycl_categoryEv _ZN2cl4sycl14exception_list5ClearEv _ZN2cl4sycl14exception_list8PushBackEONSt15__exception_ptr13exception_ptrE @@ -3622,18 +3638,30 @@ _ZN2cl4sycl14get_kernel_idsEv _ZN2cl4sycl15make_error_codeENS0_4errcE _ZN2cl4sycl16get_pointer_typeEPKvRKNS0_7contextE _ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_5queueE +_ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_5queueENS0_6detail13code_locationE _ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_5queueERKNS0_13property_listE +_ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_5queueERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_7contextE +_ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_7contextENS0_6detail13code_locationE _ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_7contextERKNS0_13property_listE +_ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_7contextERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl18get_pointer_deviceEPKvRKNS0_7contextE _ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_5queueE +_ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_5queueENS0_6detail13code_locationE _ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_5queueERKNS0_13property_listE +_ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_5queueERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_6deviceERKNS0_7contextE +_ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_6deviceERKNS0_7contextENS0_6detail13code_locationE _ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listE +_ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_5queueE +_ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_5queueENS0_6detail13code_locationE _ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_5queueERKNS0_13property_listE +_ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_5queueERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_6deviceERKNS0_7contextE +_ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_6deviceERKNS0_7contextENS0_6detail13code_locationE _ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listE +_ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl3ext5intel12experimental15online_compilerILNS3_15source_languageE0EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISE_EEEEES8_IhSaIhEERKSE_DpRKT_ _ZN2cl4sycl3ext5intel12experimental15online_compilerILNS3_15source_languageE1EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISE_EEEEES8_IhSaIhEERKSE_DpRKT_ _ZN2cl4sycl3ext5intel15online_compilerILNS2_15source_languageE0EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISD_EEEEES7_IhSaIhEERKSD_DpRKT_ @@ -3652,7 +3680,9 @@ _ZN2cl4sycl3ext6oneapi6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queu _ZN2cl4sycl3ext6oneapi6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl3ext6oneapi6detail33reduGetMaxNumConcurrentWorkGroupsESt10shared_ptrINS0_6detail10queue_implEE _ZN2cl4sycl4freeEPvRKNS0_5queueE +_ZN2cl4sycl4freeEPvRKNS0_5queueENS0_6detail13code_locationE _ZN2cl4sycl4freeEPvRKNS0_7contextE +_ZN2cl4sycl4freeEPvRKNS0_7contextENS0_6detail13code_locationE _ZN2cl4sycl5INTEL15online_compilerILNS1_15source_languageE0EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISC_EEEEES6_IhSaIhEERKSC_DpRKT_ _ZN2cl4sycl5INTEL15online_compilerILNS1_15source_languageE1EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISC_EEEEES6_IhSaIhEERKSC_DpRKT_ _ZN2cl4sycl5event13get_wait_listEv @@ -3861,6 +3891,9 @@ _ZN2cl4sycl6detail2pi9assertionEbPKc _ZN2cl4sycl6detail2pi9getPluginILNS0_7backendE1EEERKNS1_6pluginEv _ZN2cl4sycl6detail2pi9getPluginILNS0_7backendE2EEERKNS1_6pluginEv _ZN2cl4sycl6detail2pi9getPluginILNS0_7backendE5EEERKNS1_6pluginEv +_ZN2cl4sycl6detail3usm12alignedAllocEmmRKNS0_7contextERKNS0_6deviceENS0_3usm5allocE +_ZN2cl4sycl6detail3usm16alignedAllocHostEmmRKNS0_7contextENS0_3usm5allocE +_ZN2cl4sycl6detail3usm4freeEPvRKNS0_7contextE _ZN2cl4sycl6detail6OSUtil10getDirNameB5cxx11EPKc _ZN2cl4sycl6detail6OSUtil11alignedFreeEPv _ZN2cl4sycl6detail6OSUtil12alignedAllocEmm @@ -3882,9 +3915,13 @@ _ZN2cl4sycl6kernelC1ESt10shared_ptrINS0_6detail11kernel_implEE _ZN2cl4sycl6kernelC2EP10_cl_kernelRKNS0_7contextE _ZN2cl4sycl6kernelC2ESt10shared_ptrINS0_6detail11kernel_implEE _ZN2cl4sycl6mallocEmRKNS0_5queueENS0_3usm5allocE +_ZN2cl4sycl6mallocEmRKNS0_5queueENS0_3usm5allocENS0_6detail13code_locationE _ZN2cl4sycl6mallocEmRKNS0_5queueENS0_3usm5allocERKNS0_13property_listE +_ZN2cl4sycl6mallocEmRKNS0_5queueENS0_3usm5allocERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl6mallocEmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocE +_ZN2cl4sycl6mallocEmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocENS0_6detail13code_locationE _ZN2cl4sycl6mallocEmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocERKNS0_13property_listE +_ZN2cl4sycl6mallocEmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocERKNS0_13property_listENS0_6detail13code_locationE _ZN2cl4sycl6opencl10make_queueERKNS0_7contextEm _ZN2cl4sycl6opencl11make_deviceEm _ZN2cl4sycl6opencl12make_contextEm diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 57c29ebfb6d98..ced49006b4f19 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1022,23 +1022,41 @@ ?addStream@handler@sycl@cl@@AEAAXAEBV?$shared_ptr@Vstream_impl@detail@sycl@cl@@@std@@@Z ?advise_usm@MemoryManager@detail@sycl@cl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KW4_pi_mem_advice@@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@AEAPEAU_pi_event@@@Z ?alignedAlloc@OSUtil@detail@sycl@cl@@SAPEAX_K0@Z +?alignedAlloc@usm@detail@sycl@cl@@YAPEAX_K0AEBVcontext@34@AEBVdevice@34@W4alloc@134@@Z +?alignedAllocHost@usm@detail@sycl@cl@@YAPEAX_K0AEBVcontext@34@W4alloc@134@@Z ?alignedFree@OSUtil@detail@sycl@cl@@SAXPEAX@Z ?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@@Z ?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBVproperty_list@12@@Z +?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@Ucode_location@detail@12@@Z ?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVqueue@12@W4alloc@usm@12@@Z ?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVqueue@12@W4alloc@usm@12@AEBVproperty_list@12@@Z +?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVqueue@12@W4alloc@usm@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVqueue@12@W4alloc@usm@12@Ucode_location@detail@12@@Z ?aligned_alloc_device@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@@Z ?aligned_alloc_device@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@@Z +?aligned_alloc_device@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?aligned_alloc_device@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@Ucode_location@detail@12@@Z ?aligned_alloc_device@sycl@cl@@YAPEAX_K0AEBVqueue@12@@Z ?aligned_alloc_device@sycl@cl@@YAPEAX_K0AEBVqueue@12@AEBVproperty_list@12@@Z +?aligned_alloc_device@sycl@cl@@YAPEAX_K0AEBVqueue@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?aligned_alloc_device@sycl@cl@@YAPEAX_K0AEBVqueue@12@Ucode_location@detail@12@@Z ?aligned_alloc_host@sycl@cl@@YAPEAX_K0AEBVcontext@12@@Z ?aligned_alloc_host@sycl@cl@@YAPEAX_K0AEBVcontext@12@AEBVproperty_list@12@@Z +?aligned_alloc_host@sycl@cl@@YAPEAX_K0AEBVcontext@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?aligned_alloc_host@sycl@cl@@YAPEAX_K0AEBVcontext@12@Ucode_location@detail@12@@Z ?aligned_alloc_host@sycl@cl@@YAPEAX_K0AEBVqueue@12@@Z ?aligned_alloc_host@sycl@cl@@YAPEAX_K0AEBVqueue@12@AEBVproperty_list@12@@Z +?aligned_alloc_host@sycl@cl@@YAPEAX_K0AEBVqueue@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?aligned_alloc_host@sycl@cl@@YAPEAX_K0AEBVqueue@12@Ucode_location@detail@12@@Z ?aligned_alloc_shared@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@@Z ?aligned_alloc_shared@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@@Z +?aligned_alloc_shared@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?aligned_alloc_shared@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@Ucode_location@detail@12@@Z ?aligned_alloc_shared@sycl@cl@@YAPEAX_K0AEBVqueue@12@@Z ?aligned_alloc_shared@sycl@cl@@YAPEAX_K0AEBVqueue@12@AEBVproperty_list@12@@Z +?aligned_alloc_shared@sycl@cl@@YAPEAX_K0AEBVqueue@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?aligned_alloc_shared@sycl@cl@@YAPEAX_K0AEBVqueue@12@Ucode_location@detail@12@@Z ?allocate@MemoryManager@detail@sycl@cl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@sycl@cl@@@std@@PEAVSYCLMemObjI@234@_NPEAXV?$vector@V?$shared_ptr@Vevent_impl@detail@sycl@cl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@sycl@cl@@@std@@@2@@6@AEAPEAU_pi_event@@@Z ?allocateBufferObject@MemoryManager@detail@sycl@cl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@sycl@cl@@@std@@PEAX_N_KAEBVproperty_list@34@@Z ?allocateHostMem@SYCLMemObjT@detail@sycl@cl@@UEAAPEAXXZ @@ -2003,7 +2021,10 @@ ?fract@__host_std@cl@@YAMMPEAM@Z ?fract@__host_std@cl@@YANNPEAN@Z ?free@sycl@cl@@YAXPEAXAEBVcontext@12@@Z +?free@sycl@cl@@YAXPEAXAEBVcontext@12@Ucode_location@detail@12@@Z ?free@sycl@cl@@YAXPEAXAEBVqueue@12@@Z +?free@sycl@cl@@YAXPEAXAEBVqueue@12@Ucode_location@detail@12@@Z +?free@usm@detail@sycl@cl@@YAXPEAXAEBVcontext@34@@Z ?frexp@__host_std@cl@@YA?AV?$vec@M$00@sycl@2@V342@PEAV?$vec@H$00@42@@Z ?frexp@__host_std@cl@@YA?AV?$vec@M$01@sycl@2@V342@PEAV?$vec@H$01@42@@Z ?frexp@__host_std@cl@@YA?AV?$vec@M$02@sycl@2@V342@PEAV?$vec@H$02@42@@Z @@ -2592,20 +2613,36 @@ ?make_queue@opencl@sycl@cl@@YA?AVqueue@23@AEBVcontext@23@_K@Z ?malloc@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@@Z ?malloc@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBVproperty_list@12@@Z +?malloc@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?malloc@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@Ucode_location@detail@12@@Z ?malloc@sycl@cl@@YAPEAX_KAEBVqueue@12@W4alloc@usm@12@@Z ?malloc@sycl@cl@@YAPEAX_KAEBVqueue@12@W4alloc@usm@12@AEBVproperty_list@12@@Z +?malloc@sycl@cl@@YAPEAX_KAEBVqueue@12@W4alloc@usm@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?malloc@sycl@cl@@YAPEAX_KAEBVqueue@12@W4alloc@usm@12@Ucode_location@detail@12@@Z ?malloc_device@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@@Z ?malloc_device@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@@Z +?malloc_device@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?malloc_device@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@Ucode_location@detail@12@@Z ?malloc_device@sycl@cl@@YAPEAX_KAEBVqueue@12@@Z ?malloc_device@sycl@cl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@@Z +?malloc_device@sycl@cl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?malloc_device@sycl@cl@@YAPEAX_KAEBVqueue@12@Ucode_location@detail@12@@Z ?malloc_host@sycl@cl@@YAPEAX_KAEBVcontext@12@@Z ?malloc_host@sycl@cl@@YAPEAX_KAEBVcontext@12@AEBVproperty_list@12@@Z +?malloc_host@sycl@cl@@YAPEAX_KAEBVcontext@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?malloc_host@sycl@cl@@YAPEAX_KAEBVcontext@12@Ucode_location@detail@12@@Z ?malloc_host@sycl@cl@@YAPEAX_KAEBVqueue@12@@Z ?malloc_host@sycl@cl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@@Z +?malloc_host@sycl@cl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?malloc_host@sycl@cl@@YAPEAX_KAEBVqueue@12@Ucode_location@detail@12@@Z ?malloc_shared@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@@Z ?malloc_shared@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@@Z +?malloc_shared@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?malloc_shared@sycl@cl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@Ucode_location@detail@12@@Z ?malloc_shared@sycl@cl@@YAPEAX_KAEBVqueue@12@@Z ?malloc_shared@sycl@cl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@@Z +?malloc_shared@sycl@cl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@Ucode_location@detail@12@@Z +?malloc_shared@sycl@cl@@YAPEAX_KAEBVqueue@12@Ucode_location@detail@12@@Z ?map@MemoryManager@detail@sycl@cl@@SAPEAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@W4mode@access@34@IV?$range@$02@34@4V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?maxmag@__host_std@cl@@YA?AV?$vec@M$00@sycl@2@V342@0@Z ?maxmag@__host_std@cl@@YA?AV?$vec@M$01@sycl@2@V342@0@Z diff --git a/sycl/tools/CMakeLists.txt b/sycl/tools/CMakeLists.txt index a4f5674826721..773240d8a723f 100644 --- a/sycl/tools/CMakeLists.txt +++ b/sycl/tools/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(sycl-ls) if (SYCL_ENABLE_XPTI_TRACING) add_subdirectory(pi-trace) + add_subdirectory(sycl-sanitizer) endif() # TODO: move each tool in its own sub-directory diff --git a/sycl/tools/sycl-sanitizer/CMakeLists.txt b/sycl/tools/sycl-sanitizer/CMakeLists.txt new file mode 100644 index 0000000000000..de566436071ae --- /dev/null +++ b/sycl/tools/sycl-sanitizer/CMakeLists.txt @@ -0,0 +1,24 @@ +add_library(sycl_sanitizer_collector SHARED collector.cpp) +target_compile_definitions(sycl_sanitizer_collector PRIVATE XPTI_CALLBACK_API_EXPORTS) +target_link_libraries(sycl_sanitizer_collector PRIVATE xptifw) +if (TARGET OpenCL-Headers) + target_link_libraries(sycl_sanitizer_collector PRIVATE OpenCL-Headers) +endif() + +target_include_directories(sycl_sanitizer_collector PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../xpti_helpers/" + "${sycl_inc_dir}" + "${sycl_src_dir}" +) + +add_executable(sycl-sanitizer tool.cpp) + +add_dependencies(sycl-sanitizer sycl_sanitizer_collector) +add_dependencies(sycl-toolchain sycl-sanitizer) + +include(GNUInstallDirs) +install(TARGETS sycl-sanitizer sycl_sanitizer_collector + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT sycl-sanitizer + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT sycl-sanitizer + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT sycl-sanitizer +) diff --git a/sycl/tools/sycl-sanitizer/collector.cpp b/sycl/tools/sycl-sanitizer/collector.cpp new file mode 100644 index 0000000000000..d0766f2230c6b --- /dev/null +++ b/sycl/tools/sycl-sanitizer/collector.cpp @@ -0,0 +1,216 @@ +//==-------------- collector.cpp -------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// \file collector.cpp +/// The SYCL sanitizer collector intercepts PI calls to find memory leaks in +/// usages of USM pointers. + +#include "xpti/xpti_trace_framework.h" + +#include "pi_arguments_handler.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +struct TracepointInfo { + std::string Source; + std::string Function; + uint32_t Line; +}; + +enum class AllocKind { host, device, shared }; + +struct AllocationInfo { + size_t Length; + AllocKind Kind; + TracepointInfo Location; +}; + +struct GlobalState { + std::mutex IOMutex; + std::map ActivePointers; + TracepointInfo LastTracepoint; + sycl::xpti_helpers::PiArgumentsHandler ArgHandlerPostCall; + sycl::xpti_helpers::PiArgumentsHandler ArgHandlerPreCall; +}; + +GlobalState *GS = nullptr; + +static void handleUSMHostAlloc(const pi_plugin &, std::optional, + void **ResultPtr, pi_context, + pi_usm_mem_properties *, size_t Size, + pi_uint32) { + AllocationInfo Info; + Info.Location = GS->LastTracepoint; + Info.Length = Size; + Info.Kind = AllocKind::host; + GS->ActivePointers[*ResultPtr] = Info; +} + +static void handleUSMDeviceAlloc(const pi_plugin &, std::optional, + void **ResultPtr, pi_context, pi_device, + pi_usm_mem_properties *, size_t Size, + pi_uint32) { + AllocationInfo Info; + Info.Location = GS->LastTracepoint; + Info.Length = Size; + Info.Kind = AllocKind::device; + GS->ActivePointers[*ResultPtr] = Info; +} + +static void handleUSMSharedAlloc(const pi_plugin &, std::optional, + void **ResultPtr, pi_context, pi_device, + pi_usm_mem_properties *, size_t Size, + pi_uint32) { + AllocationInfo Info; + Info.Location = GS->LastTracepoint; + Info.Length = Size; + Info.Kind = AllocKind::shared; + GS->ActivePointers[*ResultPtr] = Info; +} + +static void handleUSMFree(const pi_plugin &, std::optional, + pi_context, void *Ptr) { + if (GS->ActivePointers.count(Ptr) == 0) { + std::cerr << "Attempt to free pointer " << std::hex << Ptr; + std::cerr << " that was not allocated with SYCL USM APIs.\n"; + std::cerr << " Location: function " << GS->LastTracepoint.Function; + std::cerr << " at " << GS->LastTracepoint.Source << ":"; + std::cerr << std::dec << GS->LastTracepoint.Line << "\n"; + std::terminate(); + } + GS->ActivePointers.erase(Ptr); +} + +static void handleMemBufferCreate(const pi_plugin &, std::optional, + pi_context, pi_mem_flags, size_t Size, + void *HostPtr, pi_mem *, + const pi_mem_properties *) { + for (const auto &Alloc : GS->ActivePointers) { + const void *Begin = Alloc.first; + const void *End = + static_cast(Alloc.first) + Alloc.second.Length; + // Host pointer was allocated with USM APIs + if (HostPtr >= Begin && HostPtr <= End) { + bool NeedsTerminate = false; + if (Alloc.second.Kind != AllocKind::host) { + std::cerr << "Attempt to construct a buffer with non-host pointer.\n"; + NeedsTerminate = true; + } + + const void *HostEnd = static_cast(HostPtr) + Size; + if (HostEnd > End) { + std::cerr << "Buffer size exceeds allocated host memory size.\n"; + NeedsTerminate = true; + } + + if (NeedsTerminate) { + std::cerr << " Allocation location: "; + std::cerr << " function " << Alloc.second.Location.Function << " at "; + std::cerr << Alloc.second.Location.Source << ":" + << Alloc.second.Location.Line << "\n"; + std::cerr << " Buffer location: "; + std::cerr << " function " << GS->LastTracepoint.Function << " at "; + std::cerr << GS->LastTracepoint.Source << ":" << GS->LastTracepoint.Line + << "\n"; + std::terminate(); + } + break; + } + } +} + +XPTI_CALLBACK_API void tpCallback(uint16_t trace_type, + xpti::trace_event_data_t *parent, + xpti::trace_event_data_t *event, + uint64_t instance, const void *user_data); + +XPTI_CALLBACK_API void xptiTraceInit(unsigned int /*major_version*/, + unsigned int /*minor_version*/, + const char * /*version_str*/, + const char *StreamName) { + if (std::string_view(StreamName) == "sycl.pi.debug") { + GS = new GlobalState; + uint8_t StreamID = xptiRegisterStream(StreamName); + xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::function_with_args_begin, + tpCallback); + xptiRegisterCallback( + StreamID, (uint16_t)xpti::trace_point_type_t::function_with_args_end, + tpCallback); + + GS->ArgHandlerPostCall.set_piextUSMHostAlloc(handleUSMHostAlloc); + GS->ArgHandlerPostCall.set_piextUSMDeviceAlloc(handleUSMDeviceAlloc); + GS->ArgHandlerPostCall.set_piextUSMSharedAlloc(handleUSMSharedAlloc); + GS->ArgHandlerPreCall.set_piextUSMFree(handleUSMFree); + GS->ArgHandlerPreCall.set_piMemBufferCreate(handleMemBufferCreate); + } +} + +XPTI_CALLBACK_API void xptiTraceFinish(const char *StreamName) { + if (std::string_view(StreamName) == "sycl.pi.debug") { + bool hadLeak = false; + if (GS->ActivePointers.size() > 0) { + hadLeak = true; + std::cerr << "Found " << GS->ActivePointers.size() + << " leaked memory allocations\n"; + for (const auto &Ptr : GS->ActivePointers) { + std::cerr << "Leaked pointer: " << std::hex << Ptr.first << "\n"; + std::cerr << " Location: " + << "function " << Ptr.second.Location.Function << " at " + << Ptr.second.Location.Source << ":" << std::dec + << Ptr.second.Location.Line << "\n"; + } + } + + delete GS; + if (hadLeak) + exit(-1); + } +} + +XPTI_CALLBACK_API void tpCallback(uint16_t TraceType, + xpti::trace_event_data_t *Parent, + xpti::trace_event_data_t *Event, + uint64_t /*Instance*/, const void *UserData) { + auto *Payload = xptiQueryPayloadByUID(xptiGetUniversalId()); + + if (Payload) { + if (Payload->source_file) + GS->LastTracepoint.Source = Payload->source_file; + else + GS->LastTracepoint.Source = ""; + GS->LastTracepoint.Function = Payload->name; + GS->LastTracepoint.Line = Payload->line_no; + } else { + GS->LastTracepoint.Function = ""; + GS->LastTracepoint.Source = ""; + GS->LastTracepoint.Line = 0; + } + + auto Type = static_cast(TraceType); + // Lock while we capture information + std::lock_guard Lock(GS->IOMutex); + + const auto *Data = static_cast(UserData); + const auto *Plugin = static_cast(Data->user_data); + if (Type == xpti::trace_point_type_t::function_with_args_begin) { + GS->ArgHandlerPreCall.handle(Data->function_id, *Plugin, std::nullopt, + Data->args_data); + } else if (Type == xpti::trace_point_type_t::function_with_args_end) { + const pi_result Result = *static_cast(Data->ret_data); + GS->ArgHandlerPostCall.handle(Data->function_id, *Plugin, Result, + Data->args_data); + } +} diff --git a/sycl/tools/sycl-sanitizer/tool.cpp b/sycl/tools/sycl-sanitizer/tool.cpp new file mode 100644 index 0000000000000..bbe9fedb860b8 --- /dev/null +++ b/sycl/tools/sycl-sanitizer/tool.cpp @@ -0,0 +1,80 @@ +//==----------------- tool.cpp ---------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +void showHelp() { + std::cout << "Sample usage: sycl-sanitizer application.exe --arg1 --arg2\n"; +} + +int launch(const char *Cmd, const std::vector &Args, + const std::vector &Env) { +#ifdef _WIN32 + _spawnve(_P_WAIT, Cmd, const_cast(Args.data()), + const_cast(Env.data())); + return 0; +#else + return execve(Cmd, const_cast(Args.data()), + const_cast(Env.data())); +#endif +} + +int main(int argc, char *argv[], char *env[]) { + if (argc < 2) { + showHelp(); + return 0; + } + + if (std::string_view(argv[1]) == "--help") { + showHelp(); + return 0; + } + + std::vector NewEnv; + + { + size_t I = 0; + while (env[I] != nullptr) + NewEnv.push_back(env[I++]); + } + +#ifdef _WIN32 + NewEnv.push_back("XPTI_FRAMEWORK_DISPATCHER=xptifw.dll"); + NewEnv.push_back("XPTI_SUBSCRIBERS=sycl_sanitizer_collector.dll"); +#else + NewEnv.push_back("XPTI_FRAMEWORK_DISPATCHER=libxptifw.so"); + NewEnv.push_back("XPTI_SUBSCRIBERS=libsycl_sanitizer_collector.so"); +#endif + NewEnv.push_back("XPTI_TRACE_ENABLE=1"); + NewEnv.push_back(nullptr); + + std::vector Args; + + for (size_t I = 1; I < static_cast(argc); I++) + Args.push_back(argv[I]); + + Args.push_back(nullptr); + + int Err = launch(argv[1], Args, NewEnv); + + if (Err) { + std::cerr << "Failed to launch target application. Error code " << Err + << "\n"; + return Err; + } + + return 0; +} diff --git a/xpti/include/xpti/xpti_trace_framework.h b/xpti/include/xpti/xpti_trace_framework.h index 82465daf37e03..c5c93618192f2 100644 --- a/xpti/include/xpti/xpti_trace_framework.h +++ b/xpti/include/xpti/xpti_trace_framework.h @@ -71,6 +71,21 @@ XPTI_EXPORT_API void xptiFinalize(const char *stream); /// sent as the instance ID for that task. XPTI_EXPORT_API uint64_t xptiGetUniqueId(); +/// @brief Returns universal ID +/// @details Universal ID is a 64 bit value, that can be used to correlate +/// events from different software layers. It is generated once for top SW layer +/// and then re-used by subsequent layers to identify original source code +/// location. This value is stored in thread-local storage. +XPTI_EXPORT_API uint64_t xptiGetUniversalId(); + +/// @brief Update universal ID value +/// @detail Save new universal ID value to thread-local storage. This function +/// is typically called by xpti::framework::tracepoint_t constructor when +/// updating tracepoint information. See xptiGetUniversalId() for more info +/// about universal IDs. +/// @param uid Unique 64 bit identifier. +XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid); + /// @brief Register a string to the string table /// @details All strings in the XPTI framework are referred to by their string /// IDs and this method allow you to register a string and get the string ID for @@ -418,6 +433,8 @@ typedef xpti::result_t (*xpti_initialize_t)(const char *, uint32_t, uint32_t, const char *); typedef void (*xpti_finalize_t)(const char *); typedef uint64_t (*xpti_get_unique_id_t)(); +typedef uint64_t (*xpti_get_universal_id_t)(); +typedef void (*xpti_set_universal_id_t)(uint64_t uid); typedef xpti::string_id_t (*xpti_register_string_t)(const char *, char **); typedef const char *(*xpti_lookup_string_t)(xpti::string_id_t); typedef uint64_t (*xpti_register_payload_t)(xpti::payload_t *); diff --git a/xpti/include/xpti/xpti_trace_framework.hpp b/xpti/include/xpti/xpti_trace_framework.hpp index 6e464d32290c9..ac6ef95c6be01 100644 --- a/xpti/include/xpti/xpti_trace_framework.hpp +++ b/xpti/include/xpti/xpti_trace_framework.hpp @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include #include @@ -276,7 +277,6 @@ class PlatformHelper { } // namespace utils namespace framework { -static thread_local uint64_t g_tls_uid = xpti::invalid_uid; constexpr uint16_t signal = (uint16_t)xpti::trace_point_type_t::signal; constexpr uint16_t graph_create = (uint16_t)xpti::trace_point_type_t::graph_create; @@ -397,7 +397,7 @@ class tracepoint_t { if (p) { // We expect the payload input has been populated with the information // available at that time - uint64_t uid = g_tls_uid; + uint64_t uid = xptiGetUniversalId(); if (uid != xpti::invalid_uid) { // We already have a parent SW layer that has a tracepoint defined m_payload = xptiQueryPayloadByUID(uid); @@ -405,7 +405,7 @@ class tracepoint_t { m_top = true; uid = xptiRegisterPayload(p); if (uid != xpti::invalid_uid) { - g_tls_uid = uid; + xptiSetUniversalId(uid); m_payload = xptiQueryPayloadByUID(uid); } } @@ -413,7 +413,7 @@ class tracepoint_t { } ~tracepoint_t() { if (m_top) { - g_tls_uid = xpti::invalid_uid; + ::xptiSetUniversalId(xpti::invalid_uid); } } diff --git a/xpti/src/xpti_proxy.cpp b/xpti/src/xpti_proxy.cpp index 8e1439cd24bd5..44989604ccd81 100644 --- a/xpti/src/xpti_proxy.cpp +++ b/xpti/src/xpti_proxy.cpp @@ -16,6 +16,8 @@ enum functions_t { XPTI_INITIALIZE, XPTI_FINALIZE, XPTI_GET_UNIQUE_ID, + XPTI_GET_UNIVERSAL_ID, + XPTI_SET_UNIVERSAL_ID, XPTI_REGISTER_STRING, XPTI_LOOKUP_STRING, XPTI_REGISTER_STREAM, @@ -45,6 +47,8 @@ class ProxyLoader { {XPTI_INITIALIZE, "xptiInitialize"}, {XPTI_FINALIZE, "xptiFinalize"}, {XPTI_GET_UNIQUE_ID, "xptiGetUniqueId"}, + {XPTI_GET_UNIVERSAL_ID, "xptiGetUniversalId"}, + {XPTI_SET_UNIVERSAL_ID, "xptiSetUniversalId"}, {XPTI_REGISTER_STRING, "xptiRegisterString"}, {XPTI_LOOKUP_STRING, "xptiLookupString"}, {XPTI_REGISTER_PAYLOAD, "xptiRegisterPayload"}, @@ -186,6 +190,27 @@ XPTI_EXPORT_API uint64_t xptiGetUniqueId() { return xpti::invalid_id; } +XPTI_EXPORT_API uint64_t xptiGetUniversalId() { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = + xpti::ProxyLoader::instance().functionByIndex(XPTI_GET_UNIVERSAL_ID); + if (f) { + return (*reinterpret_cast(f))(); + } + } + return xpti::invalid_id; +} + +XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = + xpti::ProxyLoader::instance().functionByIndex(XPTI_SET_UNIVERSAL_ID); + if (f) { + return (*reinterpret_cast(f))(uid); + } + } +} + XPTI_EXPORT_API xpti::string_id_t xptiRegisterString(const char *string, char **table_string) { if (xpti::g_loader.noErrors()) { diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp index 095bc2b09b494..5d46f63204c7f 100644 --- a/xptifw/src/xpti_trace_framework.cpp +++ b/xptifw/src/xpti_trace_framework.cpp @@ -39,6 +39,10 @@ namespace xpti { constexpr const char *env_subscribers = "XPTI_SUBSCRIBERS"; xpti::utils::PlatformHelper g_helper; +xpti::utils::SpinLock g_framework_mutex; + +static thread_local uint64_t g_tls_uid = xpti::invalid_uid; + // This class is a helper class to load all the listed subscribers provided by // the user in XPTI_SUBSCRIBERS environment variable. class Subscribers { @@ -818,6 +822,10 @@ class Framework { inline uint64_t makeUniqueID() { return MTracepoints.makeUniqueID(); } + uint64_t getUniversalID() const noexcept { return g_tls_uid; } + + void setUniversalID(uint64_t uid) noexcept { g_tls_uid = uid; } + xpti::result_t addMetadata(xpti::trace_event_data_t *Event, const char *Key, const char *Value) { return MTracepoints.addMetadata(Event, Key, Value); @@ -1025,6 +1033,14 @@ XPTI_EXPORT_API uint64_t xptiGetUniqueId() { return xpti::GXPTIFramework.makeUniqueID(); } +XPTI_EXPORT_API uint64_t xptiGetUniversalId() { + return xpti::Framework::instance().getUniversalID(); +} + +XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid) { + xpti::Framework::instance().setUniversalID(uid); +} + XPTI_EXPORT_API xpti::string_id_t xptiRegisterString(const char *String, char **RefTableStr) { return xpti::GXPTIFramework.registerString(String, RefTableStr);