Skip to content

Implementation of DPCTLQueue_Fill8/16/32/64/128 function to SyclInterface library #831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/conda-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ jobs:
source $CONDA/etc/profile.d/conda.sh
conda activate
CHANNELS="-c $GITHUB_WORKSPACE/channel -c dppy/label/dev -c intel --override-channels"
conda install -n examples -y $CHANNELS numba-dppy numpy dpctl || exit 1
conda install -n examples -y $CHANNELS numpy dpctl dpnp || exit 1
- name: Build and run examples with native extensions
shell: bash -l {0}
run: |
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/generate-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ jobs:
fh3 = open('combined-dpctl-c-api-coverage.json', 'w'); \
json.dump(f3, fh3); fh3.close()" || exit 1
# merge combined file with coverage data and upload
ls -lh dpctl-c-api-coverage.json pytest-dpctl-c-api-coverage.json \
combined-dpctl-c-api-coverage.json \
$(find _skbuild -name dpctl.lcov) $(find . -name dpctl_pytest.lcov)
coveralls --service=github --merge=combined-dpctl-c-api-coverage.json
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand Down
5 changes: 5 additions & 0 deletions dpctl/tests/test_sycl_kernel_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ def test_async_submit():
" global unsigned int *arg2)"
"{"
" size_t index = get_global_id(0);"
" size_t i = 0; "
" size_t unused_sum = 0;"
" for (i = 0; i < 4000; i++) { "
" unused_sum += i;"
" } "
" res[index] = "
" (arg1[index] < arg2[index]) ? arg1[index] : arg2[index];"
"}"
Expand Down
61 changes: 31 additions & 30 deletions examples/python/dppy_kernel.py → examples/python/sycl_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,49 +15,50 @@
# limitations under the License.


import numba_dppy
import dpnp
import numpy as np

import dpctl
import dpctl.tensor as dpt
from dpctl import SyclTimer


@numba_dppy.kernel
def dppy_gemm(a, b, c):
i = numba_dppy.get_global_id(0)
j = numba_dppy.get_global_id(1)
if i >= c.shape[0] or j >= c.shape[1]:
return
c[i, j] = 0
for k in range(c.shape[0]):
c[i, j] += a[i, k] * b[k, j]


X = 1024
Y = 16
global_size = X, X

griddim = X, X
blockdim = Y, Y

a = np.arange(X * X, dtype=np.float32).reshape(X, X)
b = np.array(np.random.random(X * X), dtype=np.float32).reshape(X, X)
c = np.ones_like(a).reshape(X, X)
n = 4000

try:
q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling")
q = dpctl.SyclQueue(property="enable_profiling")
except dpctl.SyclQueueCreationError:
print(
"Skipping the example, as dpctl.SyclQueue targeting "
"opencl:gpu device could not be created"
"default device could not be created"
)
exit(0)

a = dpt.reshape(dpt.arange(n * n, dtype=np.float32, sycl_queue=q), (n, n))
b = dpt.reshape(
dpt.asarray(np.random.random(n * n), dtype=np.float32, sycl_queue=q), (n, n)
)

timer = SyclTimer(time_scale=1)
with dpctl.device_context(q):

wall_times = []
device_times = []
print(
f"Performing matrix multiplication of two {n} by {n} matrices "
f"on {q.sycl_device.name}, repeating 5 times."
)
for _ in range(5):
with timer(q):
dppy_gemm[griddim, blockdim](a, b, c)
cc = np.dot(a, b)
a_matmul_b = dpnp.matmul(a, b)
host_time, device_time = timer.dt
wall_times.append(host_time)
device_times.append(device_time)

c = dpnp.asnumpy(a_matmul_b)
cc = np.dot(dpnp.asnumpy(a), dpnp.asnumpy(b))

print("Wall time: ", host_time, "\nDevice time: ", device_time)
print(np.allclose(c, cc))
print("Wall time: ", wall_times, "\nDevice time: ", device_times)
print(
"Accuracy test: passed."
if np.allclose(c, cc)
else (f"Accuracy test: failed. Discrepancy {np.max(np.abs(c-cc))}")
)
1 change: 1 addition & 0 deletions libsyclinterface/cmake/modules/SetupCoverage.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ function(setup_coverage_generation)
"-fprofile-instr-generate "
"-fcoverage-mapping "
"-fno-sycl-use-footer "
# "-save-temps=obj "
"-DDPCTL_COVERAGE "
)

Expand Down
133 changes: 114 additions & 19 deletions libsyclinterface/include/dpctl_sycl_queue_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ DPCTLQueue_GetDevice(__dpctl_keep const DPCTLSyclQueueRef QRef);
* @ingroup QueueInterface
*/
DPCTL_API
DPCTLSyclEventRef
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_SubmitRange(__dpctl_keep const DPCTLSyclKernelRef KRef,
__dpctl_keep const DPCTLSyclQueueRef QRef,
__dpctl_keep void **Args,
Expand Down Expand Up @@ -253,7 +253,7 @@ DPCTLQueue_SubmitRange(__dpctl_keep const DPCTLSyclKernelRef KRef,
* @ingroup QueueInterface
*/
DPCTL_API
DPCTLSyclEventRef
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_SubmitNDRange(__dpctl_keep const DPCTLSyclKernelRef KRef,
__dpctl_keep const DPCTLSyclQueueRef QRef,
__dpctl_keep void **Args,
Expand Down Expand Up @@ -287,10 +287,11 @@ void DPCTLQueue_Wait(__dpctl_keep const DPCTLSyclQueueRef QRef);
* @ingroup QueueInterface
*/
DPCTL_API
DPCTLSyclEventRef DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *Dest,
const void *Src,
size_t Count);
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *Dest,
const void *Src,
size_t Count);

/*!
* @brief C-API wrapper for ``sycl::queue::prefetch``.
Expand All @@ -303,9 +304,10 @@ DPCTLSyclEventRef DPCTLQueue_Memcpy(__dpctl_keep const DPCTLSyclQueueRef QRef,
* @ingroup QueueInterface
*/
DPCTL_API
DPCTLSyclEventRef DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef,
const void *Ptr,
size_t Count);
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef,
const void *Ptr,
size_t Count);

/*!
* @brief C-API wrapper for ``sycl::queue::mem_advise``.
Expand All @@ -321,10 +323,11 @@ DPCTLSyclEventRef DPCTLQueue_Prefetch(__dpctl_keep DPCTLSyclQueueRef QRef,
* @ingroup QueueInterface
*/
DPCTL_API
DPCTLSyclEventRef DPCTLQueue_MemAdvise(__dpctl_keep DPCTLSyclQueueRef QRef,
const void *Ptr,
size_t Count,
int Advice);
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_MemAdvise(__dpctl_keep DPCTLSyclQueueRef QRef,
const void *Ptr,
size_t Count,
int Advice);

/*!
* @brief C-API wrapper for sycl::queue::is_in_order that indicates whether
Expand Down Expand Up @@ -365,7 +368,7 @@ size_t DPCTLQueue_Hash(__dpctl_keep const DPCTLSyclQueueRef QRef);
* ``sycl::queue::submit_barrier()`` function.
*/
DPCTL_API
DPCTLSyclEventRef
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_SubmitBarrier(__dpctl_keep const DPCTLSyclQueueRef QRef);

/*!
Expand All @@ -380,7 +383,7 @@ DPCTLQueue_SubmitBarrier(__dpctl_keep const DPCTLSyclQueueRef QRef);
* ``sycl::queue::submit_barrier()`` function.
*/
DPCTL_API
DPCTLSyclEventRef DPCTLQueue_SubmitBarrierForEvents(
__dpctl_give DPCTLSyclEventRef DPCTLQueue_SubmitBarrierForEvents(
__dpctl_keep const DPCTLSyclQueueRef QRef,
__dpctl_keep const DPCTLSyclEventRef *DepEvents,
size_t NDepEvents);
Expand All @@ -397,9 +400,101 @@ DPCTLSyclEventRef DPCTLQueue_SubmitBarrierForEvents(
* @ingroup QueueInterface
*/
DPCTL_API
DPCTLSyclEventRef DPCTLQueue_Memset(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *USMRef,
uint8_t Value,
size_t Count);
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_Memset(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *USMRef,
uint8_t Value,
size_t Count);

/*!
* @brief C-API wrapper for ``sycl::queue::fill``.
*
* @param QRef An opaque pointer to the ``sycl::queue``.
* @param USMRef An USM pointer to the memory to fill.
* @param Value A uint8_t value to fill.
* @param Count A number of uint8_t elements to fill.
* @return An opaque pointer to the ``sycl::event`` returned by the
* ``sycl::queue::fill`` function.
* @ingroup QueueInterface
*/
DPCTL_API
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_Fill8(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *USMRef,
uint8_t Value,
size_t Count);

/*!
* @brief C-API wrapper for ``sycl::queue::fill``.
*
* @param QRef An opaque pointer to the ``sycl::queue``.
* @param USMRef An USM pointer to the memory to fill.
* @param Value A uint16_t value to fill.
* @param Count A number of uint16_t elements to fill.
* @return An opaque pointer to the ``sycl::event`` returned by the
* ``sycl::queue::fill`` function.
* @ingroup QueueInterface
*/
DPCTL_API
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_Fill16(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *USMRef,
uint16_t Value,
size_t Count);

/*!
* @brief C-API wrapper for ``sycl::queue::fill``.
*
* @param QRef An opaque pointer to the ``sycl::queue``.
* @param USMRef An USM pointer to the memory to fill.
* @param Value A uint32_t value to fill.
* @param Count A number of uint32_t elements to fill.
* @return An opaque pointer to the ``sycl::event`` returned by the
* ``sycl::queue::fill`` function.
* @ingroup QueueInterface
*/
DPCTL_API
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_Fill32(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *USMRef,
uint32_t Value,
size_t Count);

/*!
* @brief C-API wrapper for ``sycl::queue::fill``.
*
* @param QRef An opaque pointer to the ``sycl::queue``.
* @param USMRef An USM pointer to the memory to fill.
* @param Value A uint64_t value to fill.
* @param Count A number of uint64_t elements to fill.
* @return An opaque pointer to the ``sycl::event`` returned by the
* ``sycl::queue::fill`` function.
* @ingroup QueueInterface
*/
DPCTL_API
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_Fill64(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *USMRef,
uint64_t Value,
size_t Count);

/*!
* @brief C-API wrapper for ``sycl::queue::fill``.
*
* @param QRef An opaque pointer to the ``sycl::queue``.
* @param USMRef An USM pointer to the memory to fill.
* @param Value A pointer to uint64_t array of 2 elements with value
* to fill.
* @param Count A number of 128-bit elements to fill.
* @return An opaque pointer to the ``sycl::event`` returned by the
* ``sycl::queue::fill`` function.
* @ingroup QueueInterface
*/
DPCTL_API
__dpctl_give DPCTLSyclEventRef
DPCTLQueue_Fill128(__dpctl_keep const DPCTLSyclQueueRef QRef,
void *USMRef,
uint64_t *Value,
size_t Count);

DPCTL_C_EXTERN_C_END
Loading