From 6b9c335fa19cce6f1d4f4d42b2b8c8a53c4eb60d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 16 Oct 2020 09:19:08 -0500 Subject: [PATCH 1/6] Added Cython examples 1. Cython/sycl_direct_linkage Example of native extension "cdef import"-ing sycl C++ classes directly from CL/sycl.hpp Queue is created within the function, adding measurable overhead. Extension uses GEMV to compute column-wise total of a C-contiguous matrix, and illustrates linking to oneMKL. 2. Cython/sycl_bufer Example of native extension building on the above, but illustrating getting the queue from dpctl. 3. Cython/usm_memory Example of native extension allocating USM shared memory via dpctl, and using it as a buffer underlying NumPy array. Cython functions dispatches to a SYCL code that works with USM pointer. One function populates USM memory underneath NumPy array with random numbers using ``oneapi::mkl::rng::device`` function used in SYCL kernel, with random number being parameters of European vanilla options. The second function uses SYCL to price these options using Black-Scholes formula. --- examples/cython/sycl_buffer/README.md | 80 +++++++++ .../cython/sycl_buffer/_buffer_example.pyx | 23 +++ examples/cython/sycl_buffer/bench.py | 47 +++++ examples/cython/sycl_buffer/run.py | 22 +++ examples/cython/sycl_buffer/setup.py | 56 ++++++ .../cython/sycl_buffer/use_sycl_buffer.cpp | 55 ++++++ examples/cython/sycl_buffer/use_sycl_buffer.h | 5 + .../sycl_direct_linkage/_buffer_example.pyx | 25 +++ examples/cython/sycl_direct_linkage/run.py | 11 ++ examples/cython/sycl_direct_linkage/setup.py | 56 ++++++ .../sycl_direct_linkage/sycl_function.cpp | 51 ++++++ .../sycl_direct_linkage/sycl_function.hpp | 3 + examples/cython/usm_memory/README.md | 28 +++ examples/cython/usm_memory/blackscholes.pyx | 76 ++++++++ .../usm_memory/reference_black_scholes.py | 39 ++++ examples/cython/usm_memory/run.py | 78 ++++++++ examples/cython/usm_memory/setup.py | 56 ++++++ .../cython/usm_memory/sycl_blackscholes.cpp | 170 ++++++++++++++++++ .../cython/usm_memory/sycl_blackscholes.hpp | 10 ++ examples/{ => python}/create_sycl_queues.py | 0 20 files changed, 891 insertions(+) create mode 100644 examples/cython/sycl_buffer/README.md create mode 100644 examples/cython/sycl_buffer/_buffer_example.pyx create mode 100644 examples/cython/sycl_buffer/bench.py create mode 100644 examples/cython/sycl_buffer/run.py create mode 100644 examples/cython/sycl_buffer/setup.py create mode 100644 examples/cython/sycl_buffer/use_sycl_buffer.cpp create mode 100644 examples/cython/sycl_buffer/use_sycl_buffer.h create mode 100644 examples/cython/sycl_direct_linkage/_buffer_example.pyx create mode 100644 examples/cython/sycl_direct_linkage/run.py create mode 100644 examples/cython/sycl_direct_linkage/setup.py create mode 100644 examples/cython/sycl_direct_linkage/sycl_function.cpp create mode 100644 examples/cython/sycl_direct_linkage/sycl_function.hpp create mode 100644 examples/cython/usm_memory/README.md create mode 100644 examples/cython/usm_memory/blackscholes.pyx create mode 100644 examples/cython/usm_memory/reference_black_scholes.py create mode 100644 examples/cython/usm_memory/run.py create mode 100644 examples/cython/usm_memory/setup.py create mode 100644 examples/cython/usm_memory/sycl_blackscholes.cpp create mode 100644 examples/cython/usm_memory/sycl_blackscholes.hpp rename examples/{ => python}/create_sycl_queues.py (100%) diff --git a/examples/cython/sycl_buffer/README.md b/examples/cython/sycl_buffer/README.md new file mode 100644 index 0000000000..47e682d8cb --- /dev/null +++ b/examples/cython/sycl_buffer/README.md @@ -0,0 +1,80 @@ +#1 Example of SYCL extension working NumPy array input via SYCL buffers + + +#2 Decription + +Cython function expecting a 2D array in C-contiguous layout that +computes column-wise total by using SYCL oneMKL (as GEMV call with +an all units vector). + +Example illustrates compiling SYCL extension, linking to oneMKL. + + +#2 Compiling + +``` +# make sure oneAPI is activated, $ONEAPI_ROOT must be set +CC=clang CXX=dpcpp python setup.py build_ext --inplace +``` + + +#2 Running + +``` +# SYCL_BE=PI_OPENCL sets SYCL backend to OpenCL to avoid a +# transient issue with MKL's using the default Level-0 backend +(idp) [08:16:12 ansatnuc04 simple]$ SYCL_BE=PI_OPENCL ipython +Python 3.7.7 (default, Jul 14 2020, 22:02:37) +Type 'copyright', 'credits' or 'license' for more information +IPython 7.17.0 -- An enhanced Interactive Python. Type '?' for help. + +In [1]: import syclbuffer as sb, numpy as np, dpctl + +In [2]: x = np.random.randn(10**4, 2500) + +In [3]: %time m1 = np.sum(x, axis=0) +CPU times: user 22.3 ms, sys: 160 µs, total: 22.5 ms +Wall time: 21.2 ms + +In [4]: %time m = sb.columnwise_total(x) # first time is slower, due to JIT overhead +CPU times: user 207 ms, sys: 36.1 ms, total: 243 ms +Wall time: 248 ms + +In [5]: %time m = sb.columnwise_total(x) +CPU times: user 8.89 ms, sys: 4.12 ms, total: 13 ms +Wall time: 12.4 ms + +In [6]: %time m = sb.columnwise_total(x) +CPU times: user 4.82 ms, sys: 8.06 ms, total: 12.9 ms +Wall time: 12.3 ms +``` + +Running bench.py: + +``` +========== Executing warm-up ========== +NumPy result: [1. 1. 1. ... 1. 1. 1.] +SYCL(Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz) result: [1. 1. 1. ... 1. 1. 1.] +SYCL(Intel(R) Gen9 HD Graphics NEO) result: [1. 1. 1. ... 1. 1. 1.] +Times for 'opencl:cpu:0' +[2.864787499012891, 2.690436460019555, 2.5902308400254697, 2.5802528870408423, 2.538990616973024] +Times for 'opencl:gpu:0' +[1.9769684099592268, 2.3491444009705447, 2.293720397981815, 2.391633405990433, 1.9465659779962152] +Times for NumPy +[3.4011058019823395, 3.07286038500024, 3.0390414349967614, 3.0305576199898496, 3.002687797998078] +``` + +Running run.py: + +``` +(idp) [09:14:53 ansatnuc04 sycl_buffer]$ SYCL_BE=PI_OPENCL python run.py +Result computed by NumPy +[ 0.27170187 -23.36798583 7.31326489 -1.95121928] +Result computed by SYCL extension +[ 0.27170187 -23.36798583 7.31326489 -1.95121928] + +Running on: Intel(R) Gen9 HD Graphics NEO +[ 0.27170187 -23.36798583 7.31326489 -1.95121928] +Running on: Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz +[ 0.27170187 -23.36798583 7.31326489 -1.95121928] +``` \ No newline at end of file diff --git a/examples/cython/sycl_buffer/_buffer_example.pyx b/examples/cython/sycl_buffer/_buffer_example.pyx new file mode 100644 index 0000000000..fcc4dc4f74 --- /dev/null +++ b/examples/cython/sycl_buffer/_buffer_example.pyx @@ -0,0 +1,23 @@ +cimport numpy as cnp +import numpy as np + +cimport dpctl as c_dpctl +import dpctl + +cdef extern from "use_sycl_buffer.h": + int c_columnwise_total(c_dpctl.DPPLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil + +def columnwise_total(double[:, ::1] v): + cdef cnp.ndarray res_array = np.empty((v.shape[1],), dtype='d') + cdef double[::1] res_memslice = res_array + cdef int ret_status + cdef c_dpctl.SyclQueue q + cdef c_dpctl.DPPLSyclQueueRef q_ref + + q = c_dpctl.get_current_queue() + q_ref = q.get_queue_ref() + + with nogil: + ret_status = c_columnwise_total(q_ref, v.shape[0], v.shape[1], &v[0,0], &res_memslice[0]) + + return res_array diff --git a/examples/cython/sycl_buffer/bench.py b/examples/cython/sycl_buffer/bench.py new file mode 100644 index 0000000000..7a148662a7 --- /dev/null +++ b/examples/cython/sycl_buffer/bench.py @@ -0,0 +1,47 @@ +import dpctl +import syclbuffer as sb +import numpy as np + +X = np.full((10**4, 4098), 1e-4, dtype='d') + +# warm-up +print("=" * 10 + " Executing warm-up " + "=" * 10) +print("NumPy result: ", X.sum(axis=0)) + +dpctl.set_default_queue('opencl', 'cpu', 0) +print("SYCL({}) result: {}".format( + dpctl.get_current_queue().get_sycl_device().get_device_name(), + sb.columnwise_total(X)) +) + +dpctl.set_default_queue('opencl', 'gpu', 0) +print("SYCL({}) result: {}".format( + dpctl.get_current_queue().get_sycl_device().get_device_name(), + sb.columnwise_total(X)) +) + +import timeit + +print("Times for 'opencl:cpu:0'") +print(timeit.repeat( + stmt = 'sb.columnwise_total(X)', + setup = 'dpctl.set_default_queue("opencl", "cpu", 0); ' + 'sb.columnwise_total(X)', # ensure JIT compilation is not counted + number = 100, + globals=globals() +)) + +print("Times for 'opencl:gpu:0'") +print(timeit.repeat( + stmt = 'sb.columnwise_total(X)', + setup = 'dpctl.set_default_queue("opencl", "gpu", 0); sb.columnwise_total(X)', + number = 100, + globals=globals() +)) + +print("Times for NumPy") +print(timeit.repeat( + stmt = 'X.sum(axis=0)', + number=100, + globals=globals() +)) diff --git a/examples/cython/sycl_buffer/run.py b/examples/cython/sycl_buffer/run.py new file mode 100644 index 0000000000..4049e173c0 --- /dev/null +++ b/examples/cython/sycl_buffer/run.py @@ -0,0 +1,22 @@ +import syclbuffer as sb +import numpy as np + +X = np.random.randn(100, 4) + +print("Result computed by NumPy") +print(X.sum(axis=0)) +print("Result computed by SYCL extension") +print(sb.columnwise_total(X)) + + +print("") +# controlling where to offload +import dpctl + +with dpctl.device_context('opencl:gpu'): + print("Running on: ", dpctl.get_current_queue().get_sycl_device().get_device_name()) + print(sb.columnwise_total(X)) + +with dpctl.device_context('opencl:cpu'): + print("Running on: ", dpctl.get_current_queue().get_sycl_device().get_device_name()) + print(sb.columnwise_total(X)) diff --git a/examples/cython/sycl_buffer/setup.py b/examples/cython/sycl_buffer/setup.py new file mode 100644 index 0000000000..57acd4781f --- /dev/null +++ b/examples/cython/sycl_buffer/setup.py @@ -0,0 +1,56 @@ +import sys +from os.path import join, exists, abspath, dirname +from os import getcwd +from os import environ +from Cython.Build import cythonize + + +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration + from numpy.distutils.system_info import get_info + import numpy as np + import dpctl + + config = Configuration('', parent_package, top_path) + + oneapi_root = environ.get('ONEAPI_ROOT', None) + if not oneapi_root: + raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") + + mkl_info = { + 'include_dirs': [join(oneapi_root, 'mkl', 'include')], + 'library_dirs': [join(oneapi_root, 'mkl', 'lib'), join(oneapi_root, 'mkl', 'lib', 'intel64')], + 'libraries': ['mkl_sycl', 'mkl_intel_ilp64', 'mkl_tbb_thread', 'mkl_core', 'tbb', 'iomp5'] + } + + mkl_include_dirs = mkl_info.get('include_dirs') + mkl_library_dirs = mkl_info.get('library_dirs') + mkl_libraries = mkl_info.get('libraries') + + pdir = dirname(__file__) + wdir = join(pdir) + + eca = ['-Wall', '-Wextra', '-fsycl', '-fsycl-unnamed-lambda'] + + config.add_extension( + name='syclbuffer', + sources=[ + join(pdir, '_buffer_example.pyx'), + join(wdir, 'use_sycl_buffer.cpp'), + join(wdir, 'use_sycl_buffer.h') + ], + include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, + libraries=['sycl'] + mkl_libraries, + runtime_library_dirs=mkl_library_dirs, + extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], + extra_link_args=['-fPIC'], + language='c++' + ) + + config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) + return config + + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.cpp b/examples/cython/sycl_buffer/use_sycl_buffer.cpp new file mode 100644 index 0000000000..7042143538 --- /dev/null +++ b/examples/cython/sycl_buffer/use_sycl_buffer.cpp @@ -0,0 +1,55 @@ +#include +#include "use_sycl_buffer.h" +#include "mkl_blas_sycl.hpp" +#include "mkl.h" +#include "dppl_sycl_types.h" + +int +c_columnwise_total(DPPLSyclQueueRef q_ref, size_t n, size_t m, double *mat, double *ct) { + + sycl::queue q = *(reinterpret_cast(q_ref)); + + sycl::buffer mat_buffer = sycl::buffer(mat, sycl::range<1>(n * m)); + sycl::buffer ct_buffer = sycl::buffer(ct, sycl::range<1>(m)); + + double *ones = reinterpret_cast(malloc(n * sizeof(double))); + { + sycl::buffer ones_buffer = sycl::buffer(ones, sycl::range<1>(n)); + + try { + auto ev = q.submit([&](sycl::handler &cgh) { + auto ones_acc = ones_buffer.get_access(cgh); + cgh.fill(ones_acc, double(1.0)); + }); + + ev.wait_and_throw(); + } + catch (sycl::exception const& e) { + std::cout << "\t\tCaught synchronous SYCL exception during fill:\n" + << e.what() << std::endl << "OpenCL status: " << e.get_cl_code() << std::endl; + goto cleanup; + } + + try { + oneapi::mkl::blas::row_major::gemv( + q, + oneapi::mkl::transpose::trans, + n, m, double(1.0), mat_buffer, m, + ones_buffer, 1, + double(0.0), ct_buffer, 1); + q.wait(); + } + catch (sycl::exception const &e) { + std::cout << "\t\tCaught synchronous SYCL exception during GEMV:\n" + << e.what() << std::endl << "OpenCL status: " << e.get_cl_code() << std::endl; + goto cleanup; + } + } + + free(ones); + return 0; + + cleanup: + free(ones); + return -1; +} diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.h b/examples/cython/sycl_buffer/use_sycl_buffer.h new file mode 100644 index 0000000000..2c403879bb --- /dev/null +++ b/examples/cython/sycl_buffer/use_sycl_buffer.h @@ -0,0 +1,5 @@ +#include +#include "dppl_sycl_types.h" + +extern int c_columnwise_total( + DPPLSyclQueueRef q, size_t n, size_t m, double *mat, double *ct); diff --git a/examples/cython/sycl_direct_linkage/_buffer_example.pyx b/examples/cython/sycl_direct_linkage/_buffer_example.pyx new file mode 100644 index 0000000000..e9aca0fcfe --- /dev/null +++ b/examples/cython/sycl_direct_linkage/_buffer_example.pyx @@ -0,0 +1,25 @@ +cimport numpy as cnp +import numpy as np +from cython.operator cimport dereference as deref + +cdef extern from "CL/sycl.hpp" namespace "cl::sycl": + cdef cppclass queue nogil: + pass + +cdef extern from "sycl_function.hpp": + int c_columnwise_total(queue& q, size_t n, size_t m, double *m, double *ct) nogil + +def columnwise_total(double[:, ::1] v): + cdef cnp.ndarray res_array = np.empty((v.shape[1],), dtype='d') + cdef double[::1] res_memslice = res_array + cdef int ret_status + cdef queue* q + + q = new queue() + + with nogil: + ret_status = c_columnwise_total(deref(q), v.shape[0], v.shape[1], &v[0,0], &res_memslice[0]) + + del q + + return res_array diff --git a/examples/cython/sycl_direct_linkage/run.py b/examples/cython/sycl_direct_linkage/run.py new file mode 100644 index 0000000000..c147ec056c --- /dev/null +++ b/examples/cython/sycl_direct_linkage/run.py @@ -0,0 +1,11 @@ +import syclbuffer as sb +import numpy as np + +X = np.random.randn(20, 10) + +# compute column-wise total with NumPy's own host code +print(X.sum(axis=0)) + +# compute column-wise total with SYCL extension +print(sb.columnwise_total(X)) + diff --git a/examples/cython/sycl_direct_linkage/setup.py b/examples/cython/sycl_direct_linkage/setup.py new file mode 100644 index 0000000000..ceee314a12 --- /dev/null +++ b/examples/cython/sycl_direct_linkage/setup.py @@ -0,0 +1,56 @@ +import sys +from os.path import join, exists, abspath, dirname +from os import getcwd +from os import environ +from Cython.Build import cythonize + + +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration + from numpy.distutils.system_info import get_info + import numpy as np + import dpctl + + config = Configuration('', parent_package, top_path) + + oneapi_root = environ.get('ONEAPI_ROOT', None) + if not oneapi_root: + raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") + + mkl_info = { + 'include_dirs': [join(oneapi_root, 'mkl', 'include')], + 'library_dirs': [join(oneapi_root, 'mkl', 'lib'), join(oneapi_root, 'mkl', 'lib', 'intel64')], + 'libraries': ['mkl_sycl', 'mkl_intel_ilp64', 'mkl_tbb_thread', 'mkl_core', 'tbb', 'iomp5'] + } + + mkl_include_dirs = mkl_info.get('include_dirs') + mkl_library_dirs = mkl_info.get('library_dirs') + mkl_libraries = mkl_info.get('libraries') + + pdir = dirname(__file__) + wdir = join(pdir) + + eca = ['-Wall', '-Wextra', '-fsycl', '-fsycl-unnamed-lambda'] + + config.add_extension( + name='syclbuffer_naive', + sources=[ + join(pdir, '_buffer_example.pyx'), + join(pdir, 'sycl_function.cpp'), + join(pdir, 'sycl_function.hpp') + ], + include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, + libraries=['sycl'] + mkl_libraries, + runtime_library_dirs=mkl_library_dirs, + extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], + extra_link_args=['-fPIC'], + language='c++' + ) + + config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) + return config + + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/examples/cython/sycl_direct_linkage/sycl_function.cpp b/examples/cython/sycl_direct_linkage/sycl_function.cpp new file mode 100644 index 0000000000..d9d8065f3e --- /dev/null +++ b/examples/cython/sycl_direct_linkage/sycl_function.cpp @@ -0,0 +1,51 @@ +#include +#include "sycl_function.hpp" +#include "mkl_blas_sycl.hpp" +#include "mkl.h" + +int c_columnwise_total(cl::sycl::queue &q, size_t n, size_t m, double *mat, double *ct) { + sycl::buffer mat_buffer = sycl::buffer(mat, sycl::range<1>(n * m)); + sycl::buffer ct_buffer = sycl::buffer(ct, sycl::range<1>(m)); + + double *ones = reinterpret_cast(malloc(n * sizeof(double))); + { + sycl::buffer ones_buffer = sycl::buffer(ones, sycl::range<1>(n)); + + try { + auto ev = q.submit([&](sycl::handler &cgh) { + auto ones_acc = ones_buffer.get_access(cgh); + cgh.fill(ones_acc, double(1.0)); + }); + + ev.wait_and_throw(); + } + catch (sycl::exception const& e) { + std::cout << "\t\tCaught synchronous SYCL exception during fill:\n" + << e.what() << std::endl << "OpenCL status: " << e.get_cl_code() << std::endl; + goto cleanup; + } + + try { + oneapi::mkl::blas::row_major::gemv( + q, + oneapi::mkl::transpose::trans, + n, m, double(1.0), mat_buffer, m, + ones_buffer, 1, + double(0.0), ct_buffer, 1); + q.wait(); + } + catch (sycl::exception const &e) { + std::cout << "\t\tCaught synchronous SYCL exception during GEMV:\n" + << e.what() << std::endl << "OpenCL status: " << e.get_cl_code() << std::endl; + goto cleanup; + } + } + + free(ones); + return 0; + + cleanup: + free(ones); + return -1; +} + diff --git a/examples/cython/sycl_direct_linkage/sycl_function.hpp b/examples/cython/sycl_direct_linkage/sycl_function.hpp new file mode 100644 index 0000000000..51e5e8474b --- /dev/null +++ b/examples/cython/sycl_direct_linkage/sycl_function.hpp @@ -0,0 +1,3 @@ +#include + +int c_columnwise_total(cl::sycl::queue&, size_t n, size_t m, double *mat, double *ct); diff --git a/examples/cython/usm_memory/README.md b/examples/cython/usm_memory/README.md new file mode 100644 index 0000000000..be3a7c6ce4 --- /dev/null +++ b/examples/cython/usm_memory/README.md @@ -0,0 +1,28 @@ +#1 Example of working with USM memory + +#2 Description + +#2 Building + +Make sure oneAPI is activated. Environment variable `$ONEAPI_ROOT` must be set. + + +``` +$ CC=clang CXX=dpcpp LD_SHARED="dpcpp -shared" python setup.py build_ext --inplace +``` + +#2 Running + +``` +$ python run.py +``` + +which gives sample output: + +``` +True +Using : Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz +Elapsed: 0.9255791641771793 +Using : Intel(R) Gen9 +Elapsed: 0.32811625860631466 +``` \ No newline at end of file diff --git a/examples/cython/usm_memory/blackscholes.pyx b/examples/cython/usm_memory/blackscholes.pyx new file mode 100644 index 0000000000..6495265461 --- /dev/null +++ b/examples/cython/usm_memory/blackscholes.pyx @@ -0,0 +1,76 @@ +# cython: language_level=3 +# distutils: language=c++ + +cimport dpctl as c_dpctl +cimport dpctl._memory as c_dpctl_mem +cimport numpy as cnp +from cython cimport floating + +import dpctl +import numpy as np + +cdef extern from "sycl_blackscholes.hpp": + cdef void cpp_blackscholes[T](c_dpctl.DPPLSyclQueueRef, size_t n_opts, T* option_params, T* callput) + cdef void cpp_populate_params[T](c_dpctl.DPPLSyclQueueRef, size_t n_opts, T* option_params, T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, int seed) + +def black_scholes_price(floating[:, ::1] option_params): + cdef size_t n_opts = option_params.shape[0] + cdef size_t n_params = option_params.shape[1] + cdef c_dpctl.SyclQueue q + cdef c_dpctl.DPPLSyclQueueRef q_ptr + cdef c_dpctl_mem.MemoryUSMShared mobj + cdef floating[:, :] call_put_prices + cdef cnp.ndarray callput_arr + cdef double* dp1 + cdef double* dp2 + cdef float* fp1 + cdef float* fp2 + + if (n_params != 5): + raise ValueError(( + "Array of option parameters has unexpected number of columns {} != 5. " + "Each row must specify (current_price, strike_price, maturity, interest_rate, volatility)." + ).format(n_params)) + + q = c_dpctl.get_current_queue() + q_ptr = q.get_queue_ref() + if (floating is double): + mobj = c_dpctl_mem.MemoryUSMShared(nbytes=2*n_opts * sizeof(double)) + callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='d') + call_put_prices = callput_arr + dp1 = &option_params[0,0] + dp2 = &call_put_prices[0,0]; + cpp_blackscholes[double](q_ptr, n_opts, dp1, dp2) + elif (floating is float): + mobj = c_dpctl_mem.MemoryUSMShared(nbytes=2*n_opts * sizeof(float)) + callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='f') + call_put_prices = callput_arr + fp1 = &option_params[0,0] + fp2 = &call_put_prices[0,0] + cpp_blackscholes[float](q_ptr, n_opts, fp1, fp2) + + return callput_arr + +def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, int seed): + cdef size_t n_opts = option_params.shape[0] + cdef size_t n_params = option_params.shape[1] + + cdef c_dpctl.SyclQueue q + cdef c_dpctl.DPPLSyclQueueRef q_ptr + cdef double* dp + cdef float* fp + + if (n_params != 5): + raise ValueError(( + "Array of option parameters has unexpected number of columns {} != 5. " + "Each row must specify (current_price, strike_price, maturity, interest_rate, volatility)." + ).format(n_params)) + + q = c_dpctl.get_current_queue() + q_ptr = q.get_queue_ref() + if (floating is double): + dp = &option_params[0,0] + cpp_populate_params[double](q_ptr, n_opts, dp, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed) + elif (floating is float): + fp = &option_params[0,0] + cpp_populate_params[float](q_ptr, n_opts, fp, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed) diff --git a/examples/cython/usm_memory/reference_black_scholes.py b/examples/cython/usm_memory/reference_black_scholes.py new file mode 100644 index 0000000000..bc2ef8a91f --- /dev/null +++ b/examples/cython/usm_memory/reference_black_scholes.py @@ -0,0 +1,39 @@ +import math + +def ref_python_black_scholes(price, strike, t, rate, vol): + mr = -rate + sig_sig_two = vol * vol * 2 + + P = price + S = strike + T = t + + a = math.log(P / S) + b = T * mr + + z = T * sig_sig_two + c = 0.25 * z + y = 1 / math.sqrt(z) + + Se = math.exp(b) * S + + w1 = (a - b + c) * y + w2 = (a - b - c) * y + + if (w1 > 0): + d1 = 0.5 * math.erfc(-w1) + d1c = 1.0 - d1 + else: + d1c = 0.5 * math.erfc(w1) + d1 = 1.0 - d1c + if (w2 > 0): + d2 = 0.5 * math.erfc(-w2) + d2c = 1.0 - d2 + else: + d2c = 0.5 * math.erfc(w2) + d2 = 1.0 - d2c + + + call = P * d1 - Se * d2 + put = Se * d2c - P * d1c + return (call, put) diff --git a/examples/cython/usm_memory/run.py b/examples/cython/usm_memory/run.py new file mode 100644 index 0000000000..5e36b9b0ef --- /dev/null +++ b/examples/cython/usm_memory/run.py @@ -0,0 +1,78 @@ +# coding: utf-8 +import dpctl._memory as dpctl_mem +import blackscholes_usm as bs +import numpy as np, dpctl +from reference_black_scholes import ref_python_black_scholes + +def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype): + usm_mem = dpctl_mem.MemoryUSMShared(n_opts * 5 * np.dtype(dtype).itemsize) + # usm_mem2 = dpctl_mem.MemoryUSMDevice(n_opts * 5 * np.dtype(dtype).itemsize) + params = np.ndarray(shape = (n_opts, 5), buffer=usm_mem, dtype=dtype) + seed = 1234 + bs.populate_params( + params, + pl, ph, + sl, sh, + tl, th, + rl, rh, + vl, vh, + seed + ) + return params + +# ==== dry run === +usm_mem = dpctl_mem.MemoryUSMShared(3*5*np.dtype('d').itemsize) +opts = np.ndarray((3, 5), buffer=usm_mem, dtype='d') +# copy from Host NumPy to USM buffer +opts[:,:] = np.array([ + [81.2, 81.8, 29, 0.01, 0.02], + [24.24, 22.1, 10, 0.02, 0.08], + [100, 100, 30, 0.01, 0.12] +]) +# GPU computation +Xgpu = bs.black_scholes_price(opts) + +# compute prices in CPython +X_ref = np.array([ref_python_black_scholes(*opt) for opt in opts], dtype='d') + +print(np.allclose(Xgpu, X_ref, atol=1e-5)) + +n_opts = 3*10**6 + +# compute on CPU sycl device +import timeit + +for _ in range(3): + + dpctl.set_default_queue('opencl', 'cpu', 0) + print("Using : {}".format(dpctl.get_current_queue().get_sycl_device().get_device_name())) + + t0 = timeit.default_timer() + opts1 = gen_option_params(n_opts, + 20.0, 30.0, + 22.0, 29.0, + 18.0, 24.0, + 0.01, 0.05, + 0.01, 0.05, 'd') + X1 = bs.black_scholes_price(opts1) + t1 = timeit.default_timer() + + print("Elapsed: {}".format(t1-t0)) + + # compute on GPU sycl device + dpctl.set_default_queue('level0', 'gpu', 0) + print("Using : {}".format(dpctl.get_current_queue().get_sycl_device().get_device_name())) + + t0 = timeit.default_timer() + opts2 = gen_option_params(n_opts, + 20.0, 30.0, + 22.0, 29.0, + 18.0, 24.0, + 0.01, 0.05, + 0.01, 0.05, 'd') + X2 = bs.black_scholes_price(opts2) + t1 = timeit.default_timer() + print("Elapsed: {}".format(t1-t0)) + +print(np.abs(opts1-opts2).max()) +print(np.abs(X2-X1).max()) diff --git a/examples/cython/usm_memory/setup.py b/examples/cython/usm_memory/setup.py new file mode 100644 index 0000000000..da71d08a7e --- /dev/null +++ b/examples/cython/usm_memory/setup.py @@ -0,0 +1,56 @@ +import sys +from os.path import join, exists, abspath, dirname +from os import getcwd +from os import environ +from Cython.Build import cythonize + + +def configuration(parent_package='', top_path=None): + from numpy.distutils.misc_util import Configuration + from numpy.distutils.system_info import get_info + import numpy as np + import dpctl + + config = Configuration('', parent_package, top_path) + + oneapi_root = environ.get('ONEAPI_ROOT', None) + if not oneapi_root: + raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") + + mkl_info = { + 'include_dirs': [join(oneapi_root, 'mkl', 'include')], + 'library_dirs': [join(oneapi_root, 'mkl', 'lib'), join(oneapi_root, 'mkl', 'lib', 'intel64')], + 'libraries': ['mkl_sycl', 'mkl_intel_ilp64', 'mkl_tbb_thread', 'mkl_core', 'tbb', 'iomp5'] + } + + mkl_include_dirs = mkl_info.get('include_dirs') + mkl_library_dirs = mkl_info.get('library_dirs') + mkl_libraries = mkl_info.get('libraries') + + pdir = dirname(__file__) + wdir = join(pdir) + + eca = ['-Wall', '-Wextra', '-fsycl', '-fsycl-unnamed-lambda'] + + config.add_extension( + name='blackscholes_usm', + sources=[ + join(pdir, 'blackscholes.pyx'), + join(wdir, 'sycl_blackscholes.cpp'), + join(wdir, 'sycl_blackscholes.hpp') + ], + include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, + libraries=['sycl'] + mkl_libraries, + runtime_library_dirs=mkl_library_dirs, + extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], + extra_link_args=['-fPIC'], + language='c++' + ) + + config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) + return config + + +if __name__ == '__main__': + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/examples/cython/usm_memory/sycl_blackscholes.cpp b/examples/cython/usm_memory/sycl_blackscholes.cpp new file mode 100644 index 0000000000..550a01c622 --- /dev/null +++ b/examples/cython/usm_memory/sycl_blackscholes.cpp @@ -0,0 +1,170 @@ +#include +#include "dppl_sycl_types.h" +#include "sycl_blackscholes.hpp" +#include "mkl_rng_sycl_device.hpp" + +template +class black_scholes_kernel; + +constexpr int n_params = 5; +constexpr int n_params_next_pow2 = 8; + +constexpr int n_prices = 2; +constexpr int PRICE = 0; +constexpr int STRIKE = 1; +constexpr int MATURITY = 2; +constexpr int RATE = 3; +constexpr int VOLATILITY = 4; +constexpr int CALL = 0; +constexpr int PUT = 1; + +template +extern void cpp_blackscholes(DPPLSyclQueueRef q_ptr, size_t n_opts, T* params, T* callput) { + using data_t = T; + + sycl::queue q = *(reinterpret_cast(q_ptr)); + + auto ctx = q.get_context(); + { + sycl::usm::alloc params_type = sycl::get_pointer_type(params, ctx); + if (params_type != sycl::usm::alloc::shared) { + throw std::runtime_error("Input option_params to cpp_blackscholes is not a USM-shared pointer."); + } + } + { + sycl::usm::alloc callput_type = sycl::get_pointer_type(callput, ctx); + if (callput_type != sycl::usm::alloc::shared) { + throw std::runtime_error("Input callput to cpp_blackscholes is not a USM-shared pointer."); + } + } + + auto e = q.submit( + [&](sycl::handler &cgh){ + + data_t zero = data_t(0), one = data_t(1), two = data_t(2); + data_t quarter = one / data_t(4); + data_t half = one / two; + + cgh.parallel_for>( + sycl::range<1>(n_opts), + [=](sycl::id<1> idx) { + const size_t i = n_params * idx[0]; + const data_t opt_price = params[i + PRICE]; + const data_t opt_strike = params[i + STRIKE]; + const data_t opt_maturity = params[i + MATURITY]; + const data_t opt_rate = params[i + RATE]; + const data_t opt_volatility = params[i + VOLATILITY]; + data_t a, b, c, y, z, e, d1, d1c, d2, d2c, w1, w2; + data_t mr = -opt_rate, sig_sig_two = two * opt_volatility * opt_volatility; + + a = cl::sycl::log( opt_price / opt_strike ); + b = opt_maturity * mr; + z = opt_maturity * sig_sig_two; + + c = quarter * z; + e = cl::sycl::exp( b ); + y = cl::sycl::rsqrt( z ); + + a = b - a; + w1 = ( a - c ) * y; + w2 = ( a + c ) * y; + + if (w1 < zero) { + d1 = cl::sycl::erfc(w1) * half; + d1c = one - d1; + } else { + d1c = cl::sycl::erfc(-w1) * half; + d1 = one - d1c; + } + if (w2 < zero) { + d2 = cl::sycl::erfc(w2) * half; + d2c = one - d2; + } else { + d2c = cl::sycl::erfc(-w2) * half; + d2 = one - d2c; + } + + e *= opt_strike; + data_t call_price = opt_price * d1 - e * d2; + data_t put_price = e * d2c - opt_price * d1c; + + const size_t callput_i = n_prices * idx[0]; + callput[callput_i + CALL] = call_price; + callput[callput_i + PUT ] = put_price; + }); + }); + + e.wait_and_throw(); + + return; +} + +template +void cpp_populate_params(DPPLSyclQueueRef q_ptr, size_t n_opts, T* params, T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, int seed) { + sycl::queue q = *(reinterpret_cast(q_ptr)); + + auto ctx = q.get_context(); + { + sycl::usm::alloc params_type = sycl::get_pointer_type(params, ctx); + if (params_type != sycl::usm::alloc::shared) { + throw std::runtime_error("Input option_params to cpp_blackscholes is not a USM-shared pointer."); + } + } + + sycl::event e = q.submit( + [&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>(n_opts), + [=](sycl::item<1> idx) { + size_t i = n_params * idx.get_id(0); + size_t j = n_params_next_pow2 * idx.get_id(0); + + // create engine to sample 5 parameters per workers + oneapi::mkl::rng::device::philox4x32x10 engine(seed, j); + oneapi::mkl::rng::device::uniform distr; + + sycl::vec res = oneapi::mkl::rng::device::generate(distr, engine); + + { + const int pos = PRICE; + auto u = res[pos]; + params[i + pos] = pl * u + ph * (T(1)-u); + } + { + const int pos = STRIKE; + auto u = res[pos]; + params[i + pos] = sl * u + sh * (T(1)-u); + } + { + const int pos = MATURITY; + auto u = res[pos]; + params[i + pos] = tl * u + th * (T(1)-u); + } + { + const int pos = RATE; + auto u = res[pos]; + params[i + pos] = rl * u + rh * (T(1)-u); + } + { + const int pos = VOLATILITY; + auto u = res[pos]; + params[i + pos] = vl * u + vh * (T(1)-u); + } + }); + }); + + e.wait_and_throw(); +} + +// instantation for object files to not be empty + +template void cpp_blackscholes(DPPLSyclQueueRef q_ptr, size_t n_opts, double* params, double* callput); +template void cpp_blackscholes(DPPLSyclQueueRef q_ptr, size_t n_opts, float* params, float* callput); + + +template void cpp_populate_params(DPPLSyclQueueRef q_ptr, size_t n_opts, double* params, + double pl, double ph, double sl, double sh, double tl, double th, + double rl, double rh, double vl, double vh, int seed); +template void cpp_populate_params(DPPLSyclQueueRef q_ptr, size_t n_opts, float* params, + float pl, float ph, float sl, float sh, float tl, float th, + float rl, float rh, float vl, float vh, int seed); diff --git a/examples/cython/usm_memory/sycl_blackscholes.hpp b/examples/cython/usm_memory/sycl_blackscholes.hpp new file mode 100644 index 0000000000..36594810dc --- /dev/null +++ b/examples/cython/usm_memory/sycl_blackscholes.hpp @@ -0,0 +1,10 @@ +#include +#include "dppl_sycl_types.h" + +template +extern void cpp_blackscholes(DPPLSyclQueueRef q, size_t n_opts, T* params, T* callput); + +template +extern void cpp_populate_params(DPPLSyclQueueRef q, size_t n_opts, T* params, + T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, + int seed); diff --git a/examples/create_sycl_queues.py b/examples/python/create_sycl_queues.py similarity index 100% rename from examples/create_sycl_queues.py rename to examples/python/create_sycl_queues.py From 97066c9e1d21dbaaa4bcfbdb569d3b2cba3915eb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 6 Nov 2020 11:05:01 -0600 Subject: [PATCH 2/6] updated create_sycl_queues.py to run on current dpctl --- examples/python/create_sycl_queues.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/python/create_sycl_queues.py b/examples/python/create_sycl_queues.py index 6fc6cdc9fa..bf343bae4c 100644 --- a/examples/python/create_sycl_queues.py +++ b/examples/python/create_sycl_queues.py @@ -1,10 +1,11 @@ from __future__ import print_function -from dpctl import runtime, device_context, device_type +import dpctl +from dpctl import device_context, device_type # Global runtime object inside dpctl -rt = runtime +rt = dpctl # Print metadata about the runtime rt.dump() @@ -19,11 +20,11 @@ # the with device_context scope gets reset to what ever context was set # at entry of the scope. For this case, the context would go back to the # default context -with device_context(device_type.cpu, 0) as cpu_queue: +with device_context('opencl:cpu:0') as cpu_queue: print("========================================") print("Current context inside with scope") print("========================================") - rt.dump_queue(cpu_queue) + cpu_queue.get_sycl_device().dump_device_info() # Note the current context can be either directly accessed by using # the "cpu_queue" object, or it can be accessed via the runtime's @@ -31,10 +32,10 @@ print("========================================") print("Looking up current context using runtime") print("========================================") - rt.dump_queue(rt.get_current_queue()) + rt.get_current_queue().get_sycl_device().dump_device_info() print("========================================") print("Current context after exiting with scope") print("========================================") -rt.dump_queue(rt.get_current_queue()) +rt.get_current_queue().get_sycl_device().dump_device_info() From d541443a8181d48b8162be293d898f55ce2ec416 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 16 Nov 2020 16:45:37 -0600 Subject: [PATCH 3/6] Extended sycl_buffer example to implement column-wise summation without MKL --- .../cython/sycl_buffer/_buffer_example.pyx | 11 +++- .../cython/sycl_buffer/use_sycl_buffer.cpp | 58 ++++++++++++++++++- examples/cython/sycl_buffer/use_sycl_buffer.h | 2 + 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/examples/cython/sycl_buffer/_buffer_example.pyx b/examples/cython/sycl_buffer/_buffer_example.pyx index fcc4dc4f74..d1ade59c92 100644 --- a/examples/cython/sycl_buffer/_buffer_example.pyx +++ b/examples/cython/sycl_buffer/_buffer_example.pyx @@ -6,8 +6,9 @@ import dpctl cdef extern from "use_sycl_buffer.h": int c_columnwise_total(c_dpctl.DPPLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil + int c_columnwise_total_no_mkl(c_dpctl.DPPLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil -def columnwise_total(double[:, ::1] v): +def columnwise_total(double[:, ::1] v, method='mkl'): cdef cnp.ndarray res_array = np.empty((v.shape[1],), dtype='d') cdef double[::1] res_memslice = res_array cdef int ret_status @@ -17,7 +18,11 @@ def columnwise_total(double[:, ::1] v): q = c_dpctl.get_current_queue() q_ref = q.get_queue_ref() - with nogil: - ret_status = c_columnwise_total(q_ref, v.shape[0], v.shape[1], &v[0,0], &res_memslice[0]) + if method == 'mkl': + with nogil: + ret_status = c_columnwise_total(q_ref, v.shape[0], v.shape[1], &v[0,0], &res_memslice[0]) + else: + with nogil: + ret_status = c_columnwise_total_no_mkl(q_ref, v.shape[0], v.shape[1], &v[0,0], &res_memslice[0]) return res_array diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.cpp b/examples/cython/sycl_buffer/use_sycl_buffer.cpp index 7042143538..0c42332ea7 100644 --- a/examples/cython/sycl_buffer/use_sycl_buffer.cpp +++ b/examples/cython/sycl_buffer/use_sycl_buffer.cpp @@ -1,7 +1,6 @@ #include #include "use_sycl_buffer.h" -#include "mkl_blas_sycl.hpp" -#include "mkl.h" +#include #include "dppl_sycl_types.h" int @@ -53,3 +52,58 @@ c_columnwise_total(DPPLSyclQueueRef q_ref, size_t n, size_t m, double *mat, doub free(ones); return -1; } + +inline size_t upper_multiple(size_t n, size_t wg) { return wg * ((n + wg - 1)/wg); } + +int +c_columnwise_total_no_mkl(DPPLSyclQueueRef q_ref, size_t n, size_t m, double *mat, double *ct) { + + sycl::queue q = *(reinterpret_cast(q_ref)); + + sycl::buffer mat_buffer = sycl::buffer(mat, sycl::range<2>(n, m)); + sycl::buffer ct_buffer = sycl::buffer(ct, sycl::range<1>(m)); + + auto e = q.submit( + [&](sycl::handler &h) { + sycl::accessor ct_acc {ct_buffer, h, sycl::write_only}; + h.parallel_for( + sycl::range<1>(m), + [=](sycl::id<1> i){ + ct_acc[i] = 0.0; + }); + }); + + constexpr size_t wg = 256; + auto e2 = q.submit( + [&](sycl::handler &h) { + + sycl::accessor mat_acc {mat_buffer, h, sycl::read_only}; + sycl::accessor ct_acc {ct_buffer, h}; + h.depends_on(e); + + sycl::range<2> global {upper_multiple(n, wg), m}; + sycl::range<2> local {wg, 1}; + + h.parallel_for( + sycl::nd_range<2>(global, local), + [=](sycl::nd_item<2> it) { + size_t i = it.get_global_id(0); + size_t j = it.get_global_id(1); + double group_sum = sycl::ONEAPI::reduce( + it.get_group(), + (i < n) ? mat_acc[it.get_global_id()] : 0.0, + std::plus() + ); + if (it.get_local_id(0) == 0) { + sycl::ONEAPI::atomic_ref< + double, + sycl::ONEAPI::memory_order::relaxed, + sycl::ONEAPI::memory_scope::system, + sycl::access::address_space::global_space>(ct_acc[j]) += group_sum; + } + }); + }); + + e2.wait_and_throw(); + return 0; +} diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.h b/examples/cython/sycl_buffer/use_sycl_buffer.h index 2c403879bb..f3ee924861 100644 --- a/examples/cython/sycl_buffer/use_sycl_buffer.h +++ b/examples/cython/sycl_buffer/use_sycl_buffer.h @@ -3,3 +3,5 @@ extern int c_columnwise_total( DPPLSyclQueueRef q, size_t n, size_t m, double *mat, double *ct); +extern int c_columnwise_total_no_mkl( + DPPLSyclQueueRef q, size_t n, size_t m, double *mat, double *ct); From 08d505980f8c814e36cd1074ea13042026558cc9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 3 Dec 2020 14:11:24 -0600 Subject: [PATCH 4/6] few examples illustrating MemoryUSM* objects --- examples/python/usm_memory_allocation.py | 23 +++++++++++++++ examples/python/usm_memory_host_access.py | 36 +++++++++++++++++++++++ examples/python/usm_memory_operation.py | 30 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 examples/python/usm_memory_allocation.py create mode 100644 examples/python/usm_memory_host_access.py create mode 100644 examples/python/usm_memory_operation.py diff --git a/examples/python/usm_memory_allocation.py b/examples/python/usm_memory_allocation.py new file mode 100644 index 0000000000..f0ce53fd40 --- /dev/null +++ b/examples/python/usm_memory_allocation.py @@ -0,0 +1,23 @@ +import dpctl +import dpctl.memory as dpmem + +# allocate USM-shared byte-buffer +ms = dpmem.MemoryUSMShared(16) + +# allocate USM-device byte-buffer +md = dpmem.MemoryUSMDevice(16) + +# allocate USM-host byte-buffer +mh = dpmem.MemoryUSMHost(16) + +# specify alignment +mda = dpmem.MemoryUSMDevice(128, alignment=16) + +# allocate using given queue, +# i.e. on the device and bound to the context stored in the queue +mdq = dpmem.MemoryUSMDevice(256, queue=mda._queue) + +# information about device associate with USM buffer +print("Allocation performed on device:") +mda._queue.get_sycl_device().dump_device_info() + diff --git a/examples/python/usm_memory_host_access.py b/examples/python/usm_memory_host_access.py new file mode 100644 index 0000000000..f907b9c60d --- /dev/null +++ b/examples/python/usm_memory_host_access.py @@ -0,0 +1,36 @@ +import dpctl +import dpctl.memory as dpmem + +# USM-shared and USM-host pointers are host-accessible, +# meaning they are accessible from Python, therefore +# they implement Pyton buffer protocol + +# allocate 1K of USM-shared buffer +ms = dpmem.MemoryUSMShared(1024) + +# create memoryview into USM-shared buffer +msv = memoryview(ms) + +# populate buffer from host one byte at a type +for i in range(len(ms)): + ir = i % 256 + msv[i] = ir**2 % 256 + +mh = dpmem.MemoryUSMHost(64) +mhv = memoryview(mh) + +# copy content of block of USM-shared buffer to +# USM-host buffer +mhv[:] = msv[78:78+len(mh)] + +print("Byte-values of the USM-host buffer") +print(list(mhv)) + +# USM-device buffer is not host accessible +md = dpmem.MemoryUSMDevice(16) +try: + mdv = memoryview(md) +except Exception as e: + print("") + print("An expected exception was raised during attempted construction of memoryview from USM-device memory object.") + print("\t", e) diff --git a/examples/python/usm_memory_operation.py b/examples/python/usm_memory_operation.py new file mode 100644 index 0000000000..efdb7861fd --- /dev/null +++ b/examples/python/usm_memory_operation.py @@ -0,0 +1,30 @@ +import dpctl +import dpctl.memory as dpmem +import numpy as np + +ms = dpmem.MemoryUSMShared(32) +md = dpmem.MemoryUSMDevice(32) + +host_buf = np.random.randint(0, 42, dtype=np.uint8, size=32) + +# copy host byte-like object to USM-device buffer +md.copy_from_host(host_buf) + +# copy USM-device buffer to USM-shared buffer in parallel (using sycl::queue::memcpy) +ms.copy_from_device(md) + +# build numpy array reusing host-accessible USM-shared memory +X = np.ndarray((len(ms),), buffer=ms, dtype=np.uint8) + +# Display Python object NumPy ndarray is viewing into +print("numpy.ndarray.base: ", X.base) +print("") + +# Print content of the view +print("View..........: ", X) + +# Print content of the original host buffer +print("host_buf......: ", host_buf) + +# use copy_to_host to retrieve memory of USM-device memory +print("copy_to_host(): ", md.copy_to_host()) From 976725895d9f0957ed0abd7678e21130f2239414 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 3 Dec 2020 14:29:49 -0600 Subject: [PATCH 5/6] have it black's way --- examples/cython/sycl_buffer/bench.py | 58 +++++++------- examples/cython/sycl_buffer/run.py | 4 +- examples/cython/sycl_buffer/setup.py | 51 ++++++++----- examples/cython/sycl_direct_linkage/run.py | 1 - examples/cython/sycl_direct_linkage/setup.py | 51 ++++++++----- .../usm_memory/reference_black_scholes.py | 6 +- examples/cython/usm_memory/run.py | 76 +++++++++---------- examples/cython/usm_memory/setup.py | 51 ++++++++----- examples/python/create_sycl_queues.py | 2 +- examples/python/usm_memory_allocation.py | 1 - examples/python/usm_memory_host_access.py | 8 +- 11 files changed, 172 insertions(+), 137 deletions(-) diff --git a/examples/cython/sycl_buffer/bench.py b/examples/cython/sycl_buffer/bench.py index 7a148662a7..0c6d94d189 100644 --- a/examples/cython/sycl_buffer/bench.py +++ b/examples/cython/sycl_buffer/bench.py @@ -2,46 +2,50 @@ import syclbuffer as sb import numpy as np -X = np.full((10**4, 4098), 1e-4, dtype='d') +X = np.full((10 ** 4, 4098), 1e-4, dtype="d") # warm-up print("=" * 10 + " Executing warm-up " + "=" * 10) print("NumPy result: ", X.sum(axis=0)) -dpctl.set_default_queue('opencl', 'cpu', 0) -print("SYCL({}) result: {}".format( - dpctl.get_current_queue().get_sycl_device().get_device_name(), - sb.columnwise_total(X)) +dpctl.set_default_queue("opencl", "cpu", 0) +print( + "SYCL({}) result: {}".format( + dpctl.get_current_queue().get_sycl_device().get_device_name(), + sb.columnwise_total(X), + ) ) -dpctl.set_default_queue('opencl', 'gpu', 0) -print("SYCL({}) result: {}".format( - dpctl.get_current_queue().get_sycl_device().get_device_name(), - sb.columnwise_total(X)) +dpctl.set_default_queue("opencl", "gpu", 0) +print( + "SYCL({}) result: {}".format( + dpctl.get_current_queue().get_sycl_device().get_device_name(), + sb.columnwise_total(X), + ) ) import timeit print("Times for 'opencl:cpu:0'") -print(timeit.repeat( - stmt = 'sb.columnwise_total(X)', - setup = 'dpctl.set_default_queue("opencl", "cpu", 0); ' - 'sb.columnwise_total(X)', # ensure JIT compilation is not counted - number = 100, - globals=globals() -)) +print( + timeit.repeat( + stmt="sb.columnwise_total(X)", + setup='dpctl.set_default_queue("opencl", "cpu", 0); ' + "sb.columnwise_total(X)", # ensure JIT compilation is not counted + number=100, + globals=globals(), + ) +) print("Times for 'opencl:gpu:0'") -print(timeit.repeat( - stmt = 'sb.columnwise_total(X)', - setup = 'dpctl.set_default_queue("opencl", "gpu", 0); sb.columnwise_total(X)', - number = 100, - globals=globals() -)) +print( + timeit.repeat( + stmt="sb.columnwise_total(X)", + setup='dpctl.set_default_queue("opencl", "gpu", 0); sb.columnwise_total(X)', + number=100, + globals=globals(), + ) +) print("Times for NumPy") -print(timeit.repeat( - stmt = 'X.sum(axis=0)', - number=100, - globals=globals() -)) +print(timeit.repeat(stmt="X.sum(axis=0)", number=100, globals=globals())) diff --git a/examples/cython/sycl_buffer/run.py b/examples/cython/sycl_buffer/run.py index 4049e173c0..4e279a84e2 100644 --- a/examples/cython/sycl_buffer/run.py +++ b/examples/cython/sycl_buffer/run.py @@ -13,10 +13,10 @@ # controlling where to offload import dpctl -with dpctl.device_context('opencl:gpu'): +with dpctl.device_context("opencl:gpu"): print("Running on: ", dpctl.get_current_queue().get_sycl_device().get_device_name()) print(sb.columnwise_total(X)) -with dpctl.device_context('opencl:cpu'): +with dpctl.device_context("opencl:cpu"): print("Running on: ", dpctl.get_current_queue().get_sycl_device().get_device_name()) print(sb.columnwise_total(X)) diff --git a/examples/cython/sycl_buffer/setup.py b/examples/cython/sycl_buffer/setup.py index 57acd4781f..ef9b6f3b78 100644 --- a/examples/cython/sycl_buffer/setup.py +++ b/examples/cython/sycl_buffer/setup.py @@ -5,52 +5,63 @@ from Cython.Build import cythonize -def configuration(parent_package='', top_path=None): +def configuration(parent_package="", top_path=None): from numpy.distutils.misc_util import Configuration from numpy.distutils.system_info import get_info import numpy as np import dpctl - config = Configuration('', parent_package, top_path) + config = Configuration("", parent_package, top_path) - oneapi_root = environ.get('ONEAPI_ROOT', None) + oneapi_root = environ.get("ONEAPI_ROOT", None) if not oneapi_root: raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") mkl_info = { - 'include_dirs': [join(oneapi_root, 'mkl', 'include')], - 'library_dirs': [join(oneapi_root, 'mkl', 'lib'), join(oneapi_root, 'mkl', 'lib', 'intel64')], - 'libraries': ['mkl_sycl', 'mkl_intel_ilp64', 'mkl_tbb_thread', 'mkl_core', 'tbb', 'iomp5'] + "include_dirs": [join(oneapi_root, "mkl", "include")], + "library_dirs": [ + join(oneapi_root, "mkl", "lib"), + join(oneapi_root, "mkl", "lib", "intel64"), + ], + "libraries": [ + "mkl_sycl", + "mkl_intel_ilp64", + "mkl_tbb_thread", + "mkl_core", + "tbb", + "iomp5", + ], } - mkl_include_dirs = mkl_info.get('include_dirs') - mkl_library_dirs = mkl_info.get('library_dirs') - mkl_libraries = mkl_info.get('libraries') + mkl_include_dirs = mkl_info.get("include_dirs") + mkl_library_dirs = mkl_info.get("library_dirs") + mkl_libraries = mkl_info.get("libraries") pdir = dirname(__file__) wdir = join(pdir) - eca = ['-Wall', '-Wextra', '-fsycl', '-fsycl-unnamed-lambda'] + eca = ["-Wall", "-Wextra", "-fsycl", "-fsycl-unnamed-lambda"] config.add_extension( - name='syclbuffer', + name="syclbuffer", sources=[ - join(pdir, '_buffer_example.pyx'), - join(wdir, 'use_sycl_buffer.cpp'), - join(wdir, 'use_sycl_buffer.h') - ], + join(pdir, "_buffer_example.pyx"), + join(wdir, "use_sycl_buffer.cpp"), + join(wdir, "use_sycl_buffer.h"), + ], include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, - libraries=['sycl'] + mkl_libraries, + libraries=["sycl"] + mkl_libraries, runtime_library_dirs=mkl_library_dirs, - extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], - extra_link_args=['-fPIC'], - language='c++' + extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], + extra_link_args=["-fPIC"], + language="c++", ) config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) return config -if __name__ == '__main__': +if __name__ == "__main__": from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/examples/cython/sycl_direct_linkage/run.py b/examples/cython/sycl_direct_linkage/run.py index c147ec056c..ed9597add1 100644 --- a/examples/cython/sycl_direct_linkage/run.py +++ b/examples/cython/sycl_direct_linkage/run.py @@ -8,4 +8,3 @@ # compute column-wise total with SYCL extension print(sb.columnwise_total(X)) - diff --git a/examples/cython/sycl_direct_linkage/setup.py b/examples/cython/sycl_direct_linkage/setup.py index ceee314a12..495838b1fd 100644 --- a/examples/cython/sycl_direct_linkage/setup.py +++ b/examples/cython/sycl_direct_linkage/setup.py @@ -5,52 +5,63 @@ from Cython.Build import cythonize -def configuration(parent_package='', top_path=None): +def configuration(parent_package="", top_path=None): from numpy.distutils.misc_util import Configuration from numpy.distutils.system_info import get_info import numpy as np import dpctl - config = Configuration('', parent_package, top_path) + config = Configuration("", parent_package, top_path) - oneapi_root = environ.get('ONEAPI_ROOT', None) + oneapi_root = environ.get("ONEAPI_ROOT", None) if not oneapi_root: raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") mkl_info = { - 'include_dirs': [join(oneapi_root, 'mkl', 'include')], - 'library_dirs': [join(oneapi_root, 'mkl', 'lib'), join(oneapi_root, 'mkl', 'lib', 'intel64')], - 'libraries': ['mkl_sycl', 'mkl_intel_ilp64', 'mkl_tbb_thread', 'mkl_core', 'tbb', 'iomp5'] + "include_dirs": [join(oneapi_root, "mkl", "include")], + "library_dirs": [ + join(oneapi_root, "mkl", "lib"), + join(oneapi_root, "mkl", "lib", "intel64"), + ], + "libraries": [ + "mkl_sycl", + "mkl_intel_ilp64", + "mkl_tbb_thread", + "mkl_core", + "tbb", + "iomp5", + ], } - mkl_include_dirs = mkl_info.get('include_dirs') - mkl_library_dirs = mkl_info.get('library_dirs') - mkl_libraries = mkl_info.get('libraries') + mkl_include_dirs = mkl_info.get("include_dirs") + mkl_library_dirs = mkl_info.get("library_dirs") + mkl_libraries = mkl_info.get("libraries") pdir = dirname(__file__) wdir = join(pdir) - eca = ['-Wall', '-Wextra', '-fsycl', '-fsycl-unnamed-lambda'] + eca = ["-Wall", "-Wextra", "-fsycl", "-fsycl-unnamed-lambda"] config.add_extension( - name='syclbuffer_naive', + name="syclbuffer_naive", sources=[ - join(pdir, '_buffer_example.pyx'), - join(pdir, 'sycl_function.cpp'), - join(pdir, 'sycl_function.hpp') - ], + join(pdir, "_buffer_example.pyx"), + join(pdir, "sycl_function.cpp"), + join(pdir, "sycl_function.hpp"), + ], include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, - libraries=['sycl'] + mkl_libraries, + libraries=["sycl"] + mkl_libraries, runtime_library_dirs=mkl_library_dirs, - extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], - extra_link_args=['-fPIC'], - language='c++' + extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], + extra_link_args=["-fPIC"], + language="c++", ) config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) return config -if __name__ == '__main__': +if __name__ == "__main__": from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/examples/cython/usm_memory/reference_black_scholes.py b/examples/cython/usm_memory/reference_black_scholes.py index bc2ef8a91f..ae01312932 100644 --- a/examples/cython/usm_memory/reference_black_scholes.py +++ b/examples/cython/usm_memory/reference_black_scholes.py @@ -1,5 +1,6 @@ import math + def ref_python_black_scholes(price, strike, t, rate, vol): mr = -rate sig_sig_two = vol * vol * 2 @@ -20,19 +21,18 @@ def ref_python_black_scholes(price, strike, t, rate, vol): w1 = (a - b + c) * y w2 = (a - b - c) * y - if (w1 > 0): + if w1 > 0: d1 = 0.5 * math.erfc(-w1) d1c = 1.0 - d1 else: d1c = 0.5 * math.erfc(w1) d1 = 1.0 - d1c - if (w2 > 0): + if w2 > 0: d2 = 0.5 * math.erfc(-w2) d2c = 1.0 - d2 else: d2c = 0.5 * math.erfc(w2) d2 = 1.0 - d2c - call = P * d1 - Se * d2 put = Se * d2c - P * d1c diff --git a/examples/cython/usm_memory/run.py b/examples/cython/usm_memory/run.py index 5e36b9b0ef..422c4baaf1 100644 --- a/examples/cython/usm_memory/run.py +++ b/examples/cython/usm_memory/run.py @@ -4,75 +4,73 @@ import numpy as np, dpctl from reference_black_scholes import ref_python_black_scholes + def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype): usm_mem = dpctl_mem.MemoryUSMShared(n_opts * 5 * np.dtype(dtype).itemsize) # usm_mem2 = dpctl_mem.MemoryUSMDevice(n_opts * 5 * np.dtype(dtype).itemsize) - params = np.ndarray(shape = (n_opts, 5), buffer=usm_mem, dtype=dtype) + params = np.ndarray(shape=(n_opts, 5), buffer=usm_mem, dtype=dtype) seed = 1234 - bs.populate_params( - params, - pl, ph, - sl, sh, - tl, th, - rl, rh, - vl, vh, - seed - ) + bs.populate_params(params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed) return params + # ==== dry run === -usm_mem = dpctl_mem.MemoryUSMShared(3*5*np.dtype('d').itemsize) -opts = np.ndarray((3, 5), buffer=usm_mem, dtype='d') +usm_mem = dpctl_mem.MemoryUSMShared(3 * 5 * np.dtype("d").itemsize) +opts = np.ndarray((3, 5), buffer=usm_mem, dtype="d") # copy from Host NumPy to USM buffer -opts[:,:] = np.array([ - [81.2, 81.8, 29, 0.01, 0.02], - [24.24, 22.1, 10, 0.02, 0.08], - [100, 100, 30, 0.01, 0.12] -]) +opts[:, :] = np.array( + [ + [81.2, 81.8, 29, 0.01, 0.02], + [24.24, 22.1, 10, 0.02, 0.08], + [100, 100, 30, 0.01, 0.12], + ] +) # GPU computation Xgpu = bs.black_scholes_price(opts) # compute prices in CPython -X_ref = np.array([ref_python_black_scholes(*opt) for opt in opts], dtype='d') +X_ref = np.array([ref_python_black_scholes(*opt) for opt in opts], dtype="d") print(np.allclose(Xgpu, X_ref, atol=1e-5)) -n_opts = 3*10**6 +n_opts = 3 * 10 ** 6 # compute on CPU sycl device import timeit for _ in range(3): - dpctl.set_default_queue('opencl', 'cpu', 0) - print("Using : {}".format(dpctl.get_current_queue().get_sycl_device().get_device_name())) + dpctl.set_default_queue("opencl", "cpu", 0) + print( + "Using : {}".format( + dpctl.get_current_queue().get_sycl_device().get_device_name() + ) + ) t0 = timeit.default_timer() - opts1 = gen_option_params(n_opts, - 20.0, 30.0, - 22.0, 29.0, - 18.0, 24.0, - 0.01, 0.05, - 0.01, 0.05, 'd') + opts1 = gen_option_params( + n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d" + ) X1 = bs.black_scholes_price(opts1) t1 = timeit.default_timer() - print("Elapsed: {}".format(t1-t0)) + print("Elapsed: {}".format(t1 - t0)) # compute on GPU sycl device - dpctl.set_default_queue('level0', 'gpu', 0) - print("Using : {}".format(dpctl.get_current_queue().get_sycl_device().get_device_name())) + dpctl.set_default_queue("level0", "gpu", 0) + print( + "Using : {}".format( + dpctl.get_current_queue().get_sycl_device().get_device_name() + ) + ) t0 = timeit.default_timer() - opts2 = gen_option_params(n_opts, - 20.0, 30.0, - 22.0, 29.0, - 18.0, 24.0, - 0.01, 0.05, - 0.01, 0.05, 'd') + opts2 = gen_option_params( + n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d" + ) X2 = bs.black_scholes_price(opts2) t1 = timeit.default_timer() - print("Elapsed: {}".format(t1-t0)) + print("Elapsed: {}".format(t1 - t0)) -print(np.abs(opts1-opts2).max()) -print(np.abs(X2-X1).max()) +print(np.abs(opts1 - opts2).max()) +print(np.abs(X2 - X1).max()) diff --git a/examples/cython/usm_memory/setup.py b/examples/cython/usm_memory/setup.py index da71d08a7e..4f3fced830 100644 --- a/examples/cython/usm_memory/setup.py +++ b/examples/cython/usm_memory/setup.py @@ -5,52 +5,63 @@ from Cython.Build import cythonize -def configuration(parent_package='', top_path=None): +def configuration(parent_package="", top_path=None): from numpy.distutils.misc_util import Configuration from numpy.distutils.system_info import get_info import numpy as np import dpctl - config = Configuration('', parent_package, top_path) + config = Configuration("", parent_package, top_path) - oneapi_root = environ.get('ONEAPI_ROOT', None) + oneapi_root = environ.get("ONEAPI_ROOT", None) if not oneapi_root: raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") mkl_info = { - 'include_dirs': [join(oneapi_root, 'mkl', 'include')], - 'library_dirs': [join(oneapi_root, 'mkl', 'lib'), join(oneapi_root, 'mkl', 'lib', 'intel64')], - 'libraries': ['mkl_sycl', 'mkl_intel_ilp64', 'mkl_tbb_thread', 'mkl_core', 'tbb', 'iomp5'] + "include_dirs": [join(oneapi_root, "mkl", "include")], + "library_dirs": [ + join(oneapi_root, "mkl", "lib"), + join(oneapi_root, "mkl", "lib", "intel64"), + ], + "libraries": [ + "mkl_sycl", + "mkl_intel_ilp64", + "mkl_tbb_thread", + "mkl_core", + "tbb", + "iomp5", + ], } - mkl_include_dirs = mkl_info.get('include_dirs') - mkl_library_dirs = mkl_info.get('library_dirs') - mkl_libraries = mkl_info.get('libraries') + mkl_include_dirs = mkl_info.get("include_dirs") + mkl_library_dirs = mkl_info.get("library_dirs") + mkl_libraries = mkl_info.get("libraries") pdir = dirname(__file__) wdir = join(pdir) - eca = ['-Wall', '-Wextra', '-fsycl', '-fsycl-unnamed-lambda'] + eca = ["-Wall", "-Wextra", "-fsycl", "-fsycl-unnamed-lambda"] config.add_extension( - name='blackscholes_usm', + name="blackscholes_usm", sources=[ - join(pdir, 'blackscholes.pyx'), - join(wdir, 'sycl_blackscholes.cpp'), - join(wdir, 'sycl_blackscholes.hpp') - ], + join(pdir, "blackscholes.pyx"), + join(wdir, "sycl_blackscholes.cpp"), + join(wdir, "sycl_blackscholes.hpp"), + ], include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, - libraries=['sycl'] + mkl_libraries, + libraries=["sycl"] + mkl_libraries, runtime_library_dirs=mkl_library_dirs, - extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], - extra_link_args=['-fPIC'], - language='c++' + extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], + extra_link_args=["-fPIC"], + language="c++", ) config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) return config -if __name__ == '__main__': +if __name__ == "__main__": from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/examples/python/create_sycl_queues.py b/examples/python/create_sycl_queues.py index bf343bae4c..bdf8368044 100644 --- a/examples/python/create_sycl_queues.py +++ b/examples/python/create_sycl_queues.py @@ -20,7 +20,7 @@ # the with device_context scope gets reset to what ever context was set # at entry of the scope. For this case, the context would go back to the # default context -with device_context('opencl:cpu:0') as cpu_queue: +with device_context("opencl:cpu:0") as cpu_queue: print("========================================") print("Current context inside with scope") print("========================================") diff --git a/examples/python/usm_memory_allocation.py b/examples/python/usm_memory_allocation.py index f0ce53fd40..82b989f50b 100644 --- a/examples/python/usm_memory_allocation.py +++ b/examples/python/usm_memory_allocation.py @@ -20,4 +20,3 @@ # information about device associate with USM buffer print("Allocation performed on device:") mda._queue.get_sycl_device().dump_device_info() - diff --git a/examples/python/usm_memory_host_access.py b/examples/python/usm_memory_host_access.py index f907b9c60d..c38807be20 100644 --- a/examples/python/usm_memory_host_access.py +++ b/examples/python/usm_memory_host_access.py @@ -14,14 +14,14 @@ # populate buffer from host one byte at a type for i in range(len(ms)): ir = i % 256 - msv[i] = ir**2 % 256 + msv[i] = ir ** 2 % 256 mh = dpmem.MemoryUSMHost(64) mhv = memoryview(mh) # copy content of block of USM-shared buffer to # USM-host buffer -mhv[:] = msv[78:78+len(mh)] +mhv[:] = msv[78 : 78 + len(mh)] print("Byte-values of the USM-host buffer") print(list(mhv)) @@ -32,5 +32,7 @@ mdv = memoryview(md) except Exception as e: print("") - print("An expected exception was raised during attempted construction of memoryview from USM-device memory object.") + print( + "An expected exception was raised during attempted construction of memoryview from USM-device memory object." + ) print("\t", e) From 4b49e94155774c3c7d96f504cb819087637aaa7f Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 3 Dec 2020 15:12:42 -0600 Subject: [PATCH 6/6] Adjusted examples section of global README.md --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4a09b2b7bc..e8ef93908b 100644 --- a/README.md +++ b/README.md @@ -76,11 +76,17 @@ Examples ======== See examples in folder `examples`. -Run examples: +Run python examples: ```bash -python examples/create_sycl_queues.py +for script in `ls examples/python/`; do echo "executing ${script}"; python examples/python/${script}; done ``` +Examples of building Cython extensions with DPC++ compiler, that interoperate with dpCtl can be found in +folder `cython`. + +Each example in `cython` folder can be built using `CC=clang CXX=dpcpp python setup.py build_ext --inplace`. +Please refer to `run.py` script in respective folders to execute extensions. + Tests ===== See tests in folder `dpctl/tests`.