diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 49e569077f9fd..1f981bdd66dc6 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -1298,7 +1298,8 @@ using pi_usm_migration_flags = _pi_usm_migration_flags; /// \param context is the pi_context /// \param pi_usm_mem_properties are optional allocation properties /// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation +/// \param alignment is the desired alignment of the allocation. 0 indicates no +/// requirements, and uses the backend default alignment. pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment); @@ -1310,7 +1311,8 @@ pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, /// \param device is the device the memory will be allocated on /// \param pi_usm_mem_properties are optional allocation properties /// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation +/// \param alignment is the desired alignment of the allocation. 0 indicates no +/// requirements, and uses the backend default alignment. pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, @@ -1323,7 +1325,8 @@ pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, /// \param device is the device the memory will be allocated on /// \param pi_usm_mem_properties are optional allocation properties /// \param size_t is the size of the allocation -/// \param alignment is the desired alignment of the allocation +/// \param alignment is the desired alignment of the allocation. 0 indicates no +/// requirements, and uses the backend default alignment. pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, @@ -1340,8 +1343,7 @@ pi_result piextUSMFree(pi_context context, void *ptr); /// \param queue is the queue to submit to /// \param ptr is the ptr to memset /// \param value is value to set. It is interpreted as an 8-bit value and the -/// upper -/// 24 bits are ignored +/// upper 24 bits are ignored /// \param count is the size in bytes to memset /// \param num_events_in_waitlist is the number of events to wait on /// \param events_waitlist is an array of events to wait on diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index af15743438da8..dad8abdb28fb1 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -3396,9 +3396,19 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, pi_result cuda_piextUSMHostAlloc(void **result_ptr, pi_context context, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment) { + // from empirical testing with CUDA 10.2 on a Tesla K40 + static constexpr pi_uint32 max_alignment = 0x200; + + // enforce a valid pointer to the allocated memory assert(result_ptr != nullptr); + // check the the context is valid assert(context != nullptr); + // check that the property list is empty assert(properties == nullptr); + // check that the alignment is not larger than max_alignment, and is either 0 + // or a power of 2 + assert(alignment <= max_alignment && (alignment & (alignment - 1)) == 0); + pi_result result = PI_SUCCESS; try { ScopedContext active(context); @@ -3406,7 +3416,9 @@ pi_result cuda_piextUSMHostAlloc(void **result_ptr, pi_context context, } catch (pi_result error) { result = error; } - assert(reinterpret_cast(*result_ptr) % alignment == 0); + // check that the result is suitable aligned + assert((alignment == 0) || + (reinterpret_cast(*result_ptr) % alignment == 0)); return result; } @@ -3416,10 +3428,21 @@ pi_result cuda_piextUSMDeviceAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment) { + // from empirical testing with CUDA 10.2 on a Tesla K40 + static constexpr pi_uint32 max_alignment = 0x200; + + // enforce a valid pointer to the allocated memory assert(result_ptr != nullptr); + // check the the context is valid assert(context != nullptr); + // check that the device is valid assert(device != nullptr); + // check that the property list is empty assert(properties == nullptr); + // check that the alignment is not larger than max_alignment, and is either 0 + // or a power of 2 + assert(alignment <= max_alignment && (alignment & (alignment - 1)) == 0); + pi_result result = PI_SUCCESS; try { ScopedContext active(context); @@ -3427,7 +3450,9 @@ pi_result cuda_piextUSMDeviceAlloc(void **result_ptr, pi_context context, } catch (pi_result error) { result = error; } - assert(reinterpret_cast(*result_ptr) % alignment == 0); + // check that the result is suitable aligned + assert((alignment == 0) || + (reinterpret_cast(*result_ptr) % alignment == 0)); return result; } @@ -3437,10 +3462,21 @@ pi_result cuda_piextUSMSharedAlloc(void **result_ptr, pi_context context, pi_device device, pi_usm_mem_properties *properties, size_t size, pi_uint32 alignment) { + // from empirical testing with CUDA 10.2 on a Tesla K40 + static constexpr pi_uint32 max_alignment = 0x200; + + // enforce a valid pointer to the allocated memory assert(result_ptr != nullptr); + // check the the context is valid assert(context != nullptr); + // check that the device is valid assert(device != nullptr); + // check that the property list is empty assert(properties == nullptr); + // check that the alignment is not larger than max_alignment, and is either 0 + // or a power of 2 + assert(alignment <= max_alignment && (alignment & (alignment - 1)) == 0); + pi_result result = PI_SUCCESS; try { ScopedContext active(context); @@ -3449,7 +3485,9 @@ pi_result cuda_piextUSMSharedAlloc(void **result_ptr, pi_context context, } catch (pi_result error) { result = error; } - assert(reinterpret_cast(*result_ptr) % alignment == 0); + // check that the result is suitable aligned + assert((alignment == 0) || + (reinterpret_cast(*result_ptr) % alignment == 0)); return result; } @@ -3481,8 +3519,12 @@ pi_result cuda_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { + // enforce that the queue is valid assert(queue != nullptr); - assert(ptr != nullptr); + // check that the pointer is valid + if (ptr == nullptr) { + return PI_INVALID_VALUE; + } CUstream cuStream = queue->get(); pi_result result = PI_SUCCESS; std::unique_ptr<_pi_event> event_ptr{nullptr}; @@ -3514,9 +3556,12 @@ pi_result cuda_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { + // enforce that the queue is valid assert(queue != nullptr); - assert(dst_ptr != nullptr); - assert(src_ptr != nullptr); + // check that the source and destination pointers are valid + if (dst_ptr == nullptr || src_ptr == nullptr) { + return PI_INVALID_VALUE; + } CUstream cuStream = queue->get(); pi_result result = PI_SUCCESS; std::unique_ptr<_pi_event> event_ptr{nullptr}; @@ -3553,8 +3598,12 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { + // enforce that the queue is valid assert(queue != nullptr); - assert(ptr != nullptr); + // check that the pointer is valid + if (ptr == nullptr) { + return PI_INVALID_VALUE; + } CUstream cuStream = queue->get(); pi_result result = PI_SUCCESS; std::unique_ptr<_pi_event> event_ptr{nullptr}; @@ -3589,8 +3638,12 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, int advice, pi_event *event) { + // enforce that the queue is valid assert(queue != nullptr); - assert(ptr != nullptr); + // check that the pointer is valid + if (ptr == nullptr) { + return PI_INVALID_VALUE; + } // TODO implement a mapping to cuMemAdvise once the expected behaviour // of piextUSMEnqueueMemAdvise is detailed in the USM extension return cuda_piEnqueueEventsWait(queue, 0, nullptr, event); diff --git a/sycl/test/usm/allocator_vector.cpp b/sycl/test/usm/allocator_vector.cpp index 2a87695c2f2ff..a2cec79fb793c 100644 --- a/sycl/test/usm/allocator_vector.cpp +++ b/sycl/test/usm/allocator_vector.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/allocator_vector_fail.cpp b/sycl/test/usm/allocator_vector_fail.cpp index 8c0e176eaa411..6a0276d72bf59 100644 --- a/sycl/test/usm/allocator_vector_fail.cpp +++ b/sycl/test/usm/allocator_vector_fail.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/allocatorll.cpp b/sycl/test/usm/allocatorll.cpp index 323dc0d75a1c1..7e633757e8369 100644 --- a/sycl/test/usm/allocatorll.cpp +++ b/sycl/test/usm/allocatorll.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/badmalloc.cpp b/sycl/test/usm/badmalloc.cpp index b139d7dbf80d8..ec99a6e475181 100644 --- a/sycl/test/usm/badmalloc.cpp +++ b/sycl/test/usm/badmalloc.cpp @@ -1,5 +1,4 @@ // UNSUPPORTED: windows -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/depends_on.cpp b/sycl/test/usm/depends_on.cpp index c985fc6c7a9a9..8716db434fed8 100644 --- a/sycl/test/usm/depends_on.cpp +++ b/sycl/test/usm/depends_on.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/dmemll.cpp b/sycl/test/usm/dmemll.cpp index 8617c9f751958..cf367385abd06 100644 --- a/sycl/test/usm/dmemll.cpp +++ b/sycl/test/usm/dmemll.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/hmemll.cpp b/sycl/test/usm/hmemll.cpp index 4ec1fd6a6516a..ac741b71ad372 100644 --- a/sycl/test/usm/hmemll.cpp +++ b/sycl/test/usm/hmemll.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/math.cpp b/sycl/test/usm/math.cpp index 22a8dc2b2e2bd..f0d0ba78f62e7 100644 --- a/sycl/test/usm/math.cpp +++ b/sycl/test/usm/math.cpp @@ -3,7 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // REQUIRES: cpu -// XFAIL: cuda // TODO: ptxas fatal : Unresolved extern function '_Z20__spirv_ocl_lgamma_rfPi' #include diff --git a/sycl/test/usm/memadvise.cpp b/sycl/test/usm/memadvise.cpp index 8183f4a59c784..a50d9e52e0a81 100644 --- a/sycl/test/usm/memadvise.cpp +++ b/sycl/test/usm/memadvise.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // SYCL runtime and piextUSM*Alloc functions for CUDA not behaving as described // in: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // diff --git a/sycl/test/usm/memcpy.cpp b/sycl/test/usm/memcpy.cpp index 0b933d0f004aa..7643f0f0f3ba3 100644 --- a/sycl/test/usm/memcpy.cpp +++ b/sycl/test/usm/memcpy.cpp @@ -5,7 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/memset.cpp b/sycl/test/usm/memset.cpp index 313fa4cbda591..80a10b6b84c1f 100644 --- a/sycl/test/usm/memset.cpp +++ b/sycl/test/usm/memset.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/mixed.cpp b/sycl/test/usm/mixed.cpp index 092a1e51d4b4f..ed1e7b6d46013 100644 --- a/sycl/test/usm/mixed.cpp +++ b/sycl/test/usm/mixed.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/mixed2.cpp b/sycl/test/usm/mixed2.cpp index 7e8ef785c42cb..278025a98c78a 100644 --- a/sycl/test/usm/mixed2.cpp +++ b/sycl/test/usm/mixed2.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/mixed2template.cpp b/sycl/test/usm/mixed2template.cpp index 24acd20396e98..24817129d8957 100644 --- a/sycl/test/usm/mixed2template.cpp +++ b/sycl/test/usm/mixed2template.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/mixed_queue.cpp b/sycl/test/usm/mixed_queue.cpp index 1c99ebda7b5ce..0a1fe439ff2da 100644 --- a/sycl/test/usm/mixed_queue.cpp +++ b/sycl/test/usm/mixed_queue.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/queue_wait.cpp b/sycl/test/usm/queue_wait.cpp index dfb2d9414fc21..6afc584b844ae 100644 --- a/sycl/test/usm/queue_wait.cpp +++ b/sycl/test/usm/queue_wait.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc diff --git a/sycl/test/usm/smemll.cpp b/sycl/test/usm/smemll.cpp index 46d1f10a5f33f..f919f136fd33d 100644 --- a/sycl/test/usm/smemll.cpp +++ b/sycl/test/usm/smemll.cpp @@ -1,4 +1,3 @@ -// XFAIL: cuda // piextUSM*Alloc functions for CUDA are not behaving as described in // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/USM.adoc // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc