From 30e3cc6ac1dbe96f32bfcac4b5ebfd1b81f89a67 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 26 Sep 2021 12:44:21 -0500 Subject: [PATCH 1/2] Added new example of Python object exposing __sycl_usm_array_interface__ Native Pybind11-generated extension implemented Python type bound to DMatrix C++ class which allocates USM memory using sycl::usm_allocator. The Python object implements __sycl_usm_array_interface__, which allows dpctl.memory.as_usm_memory to create a view into that native USM allocation. The example.py modifies that memory from Python, and uses object's own .tolist() method to retrieve the memory using C++, demonstrating that values changed. Also added license missing headers to .cpp and .hpp files from other examples --- .../cython/sycl_buffer/use_sycl_buffer.cpp | 29 ++++ .../sycl_direct_linkage/sycl_function.cpp | 28 +++ .../cython/usm_memory/sycl_blackscholes.cpp | 28 +++ .../cython/usm_memory/sycl_blackscholes.hpp | 26 +++ .../external_usm_allocation/README.md | 29 ++++ .../_usm_alloc_example.cpp | 161 ++++++++++++++++++ .../external_usm_allocation/example.py | 52 ++++++ .../pybind11/external_usm_allocation/setup.py | 34 ++++ .../use_dpctl_syclqueue/pybind11_example.cpp | 30 ++++ 9 files changed, 417 insertions(+) create mode 100644 examples/pybind11/external_usm_allocation/README.md create mode 100644 examples/pybind11/external_usm_allocation/_usm_alloc_example.cpp create mode 100644 examples/pybind11/external_usm_allocation/example.py create mode 100644 examples/pybind11/external_usm_allocation/setup.py diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.cpp b/examples/cython/sycl_buffer/use_sycl_buffer.cpp index ace6a594ba..6abae0e73b 100644 --- a/examples/cython/sycl_buffer/use_sycl_buffer.cpp +++ b/examples/cython/sycl_buffer/use_sycl_buffer.cpp @@ -1,3 +1,32 @@ +//=- use_sycl_buffer.cpp - Example of SYCL code to be called from Cython =// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements SYCL code to compute columnwise total of a matrix, +/// provided as host C-contiguous allocation. SYCL kernels access this memory +/// using `sycl::buffer`. Two routines are provided. One solves the task by +/// calling BLAS function GEMV from Intel(R) Math Kernel Library, the other +/// performs the computation using DPC++ reduction group function and atomics. +/// +//===----------------------------------------------------------------------===// + #include "use_sycl_buffer.h" #include "dpctl_sycl_types.h" #include diff --git a/examples/cython/sycl_direct_linkage/sycl_function.cpp b/examples/cython/sycl_direct_linkage/sycl_function.cpp index f38896adf0..3083b30872 100644 --- a/examples/cython/sycl_direct_linkage/sycl_function.cpp +++ b/examples/cython/sycl_direct_linkage/sycl_function.cpp @@ -1,3 +1,31 @@ +//=- use_sycl_buffer.cpp - Example of SYCL code to be called from Cython =// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements SYCL code to compute columnwise total of a matrix, +/// provided as host C-contiguous allocation. SYCL kernels access this memory +/// using `sycl::buffer`. The routine solves the task by calling BLAS function +// GEMV from Intel(R) Math Kernel Library. +/// +//===----------------------------------------------------------------------===// + #include "sycl_function.hpp" #include "mkl.h" #include diff --git a/examples/cython/usm_memory/sycl_blackscholes.cpp b/examples/cython/usm_memory/sycl_blackscholes.cpp index 798716ccaf..67406345b6 100644 --- a/examples/cython/usm_memory/sycl_blackscholes.cpp +++ b/examples/cython/usm_memory/sycl_blackscholes.cpp @@ -1,3 +1,31 @@ +//=- sycl_blackscholes.cpp - Example of SYCL code to be called from Cython =// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements SYCL code to price European vanilla options using +/// Black-Scholes formula, as well as code to generate option parameters using +/// SYCL device random number generation library from Intel(R) Math Kernel +/// Library. +/// +//===----------------------------------------------------------------------===// + #include "sycl_blackscholes.hpp" #include "dpctl_sycl_types.h" #include diff --git a/examples/cython/usm_memory/sycl_blackscholes.hpp b/examples/cython/usm_memory/sycl_blackscholes.hpp index 7a2d48e0b8..02181e2756 100644 --- a/examples/cython/usm_memory/sycl_blackscholes.hpp +++ b/examples/cython/usm_memory/sycl_blackscholes.hpp @@ -1,3 +1,29 @@ +//=- sycl_blackscholes.hpp - Example of SYCL code to be called from Cython =// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file exports C++ functions to be called from Cython-generated +/// extensions. +/// +//===----------------------------------------------------------------------===// + #include "dpctl_sycl_types.h" #include diff --git a/examples/pybind11/external_usm_allocation/README.md b/examples/pybind11/external_usm_allocation/README.md new file mode 100644 index 0000000000..7431ba4ab6 --- /dev/null +++ b/examples/pybind11/external_usm_allocation/README.md @@ -0,0 +1,29 @@ +# Exposing USM allocations made by native code to dpctl + +This extension demonstrates how a Python object backed by +a native class, which allocates USM memory, can expose it +to dpctl.memory entities using `__sycl_usm_array_interface__`. + + +# Building extension + +``` +source /opt/intel/oneapi/compiler/latest/env/vars.sh +CXX=dpcpp CC=dpcpp python setup.py build_ext --inplace +python example.py +``` + +# Sample output + +``` +(idp) [12:43:20 ansatnuc04 external_usm_allocation]$ python example.py + +{'data': [94846745444352, True], 'shape': (5, 5), 'strides': None, 'version': 1, 'typestr': '|f8', 'syclobj': } +shared + +[1.0, 1.0, 1.0, 2.0, 2.0] +[1.0, 0.0, 1.0, 2.0, 2.0] +[1.0, 1.0, 0.0, 2.0, 2.0] +[0.0, 0.0, 0.0, 3.0, -1.0] +[0.0, 0.0, 0.0, -1.0, 5.0] +``` diff --git a/examples/pybind11/external_usm_allocation/_usm_alloc_example.cpp b/examples/pybind11/external_usm_allocation/_usm_alloc_example.cpp new file mode 100644 index 0000000000..7a4e846f6d --- /dev/null +++ b/examples/pybind11/external_usm_allocation/_usm_alloc_example.cpp @@ -0,0 +1,161 @@ +//==- _usm_alloc_example.cpp - Example of Pybind11 extension exposing --===// +// native USM allocation to Python in such a way that dpctl.memory +// can form views into it. +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements Pybind11-generated extension that creates Python type +/// backed-up by C++ class DMatrix, which creates a USM allocation associated +/// with a given dpctl.SyclQueue. The Python object of this type implements +/// __sycl_usm_array_interface__, allowing dpctl.memory.as_usm_memory to form +/// a view into this allocation, and modify it from Python. +/// +/// The DMatrix type object also implements `.tolist()` method which copies +/// content of the object into list of lists of Python floats. +/// +//===----------------------------------------------------------------------===// +#include + +// clang-format off +#include "dpctl_sycl_types.h" +#include "../_sycl_queue.h" +#include "../_sycl_queue_api.h" +// clang-format on + +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace py = pybind11; + +struct DMatrix +{ + using alloc_t = sycl::usm_allocator; + using vec_t = std::vector; + + DMatrix(sycl::queue &q, size_t rows, size_t columns) + : n_(rows), m_(columns), q_(q), alloc_(q), vec_(n_ * m_, alloc_) + { + } + ~DMatrix(){}; + DMatrix(const DMatrix &) = default; + DMatrix(DMatrix &&) = default; + + size_t get_n() const + { + return n_; + } + size_t get_m() const + { + return m_; + } + vec_t &get_vector() + { + return vec_; + } + sycl::queue get_queue() const + { + return q_; + } + + double get_element(size_t i, size_t j) + { + return vec_.at(i * m_ + j); + } + +private: + size_t n_; + size_t m_; + sycl::queue q_; + alloc_t alloc_; + vec_t vec_; +}; + +DMatrix create_matrix(py::object queue, size_t n, size_t m) +{ + PyObject *queue_ptr = queue.ptr(); + if (PyObject_TypeCheck(queue_ptr, &PySyclQueueType)) { + DPCTLSyclQueueRef QRef = + get_queue_ref(reinterpret_cast(queue_ptr)); + sycl::queue *q = reinterpret_cast(QRef); + + return DMatrix(*q, n, m); + } + else { + throw std::runtime_error("expected dpctl.SyclQueue as argument"); + } +} + +py::dict construct_sua_iface(DMatrix &m) +{ + // need "version", "data", "shape", "typestr", "syclobj" + py::tuple shape = py::make_tuple(m.get_n(), m.get_m()); + py::list data_entry(2); + data_entry[0] = reinterpret_cast(m.get_vector().data()); + data_entry[1] = true; + auto syclobj = py::capsule( + reinterpret_cast(new sycl::queue(m.get_queue())), + "SyclQueueRef", [](PyObject *cap) { + if (cap) { + auto name = PyCapsule_GetName(cap); + std::string name_s(name); + if (name_s == "SyclQueueRef" or name_s == "used_SyclQueueRef") { + void *p = PyCapsule_GetPointer(cap, name); + delete reinterpret_cast(p); + } + } + }); + py::dict iface; + iface["data"] = data_entry; + iface["shape"] = shape; + iface["strides"] = py::none(); + iface["version"] = 1; + iface["typestr"] = "|f8"; + iface["syclobj"] = syclobj; + + return iface; +} + +py::list tolist(DMatrix &m) +{ + size_t rows_count = m.get_n(); + size_t cols_count = m.get_m(); + py::list rows(rows_count); + for (size_t i = 0; i < rows_count; ++i) { + py::list row_i(cols_count); + for (size_t j = 0; j < cols_count; ++j) { + row_i[j] = m.get_element(i, j); + } + rows[i] = row_i; + } + return rows; +} + +PYBIND11_MODULE(external_usm_alloc, m) +{ + // Import the dpctl._sycl_queue extension + import_dpctl___sycl_queue(); + + py::class_ dm(m, "DMatrix"); + dm.def(py::init(&create_matrix), + "DMatrix(dpctl.SyclQueue, n_rows, n_cols)"); + dm.def_property("__sycl_usm_array_interface__", &construct_sua_iface, + nullptr); + dm.def("tolist", &tolist, "Return matrix a Python list of lists"); +} diff --git a/examples/pybind11/external_usm_allocation/example.py b/examples/pybind11/external_usm_allocation/example.py new file mode 100644 index 0000000000..522f822a36 --- /dev/null +++ b/examples/pybind11/external_usm_allocation/example.py @@ -0,0 +1,52 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +import external_usm_alloc as eua +import numpy as np + +import dpctl +import dpctl.memory as dpm + +q = dpctl.SyclQueue("gpu") +matr = eua.DMatrix(q, 5, 5) + +print(matr) +print(matr.__sycl_usm_array_interface__) + +blob = dpm.as_usm_memory(matr) + +print(blob.get_usm_type()) + +Xh = np.array( + [ + [1, 1, 1, 2, 2], + [1, 0, 1, 2, 2], + [1, 1, 0, 2, 2], + [0, 0, 0, 3, -1], + [0, 0, 0, -1, 5], + ], + dtype="d", +) +host_bytes_view = Xh.reshape((-1)).view(np.ubyte) + +blob.copy_from_host(host_bytes_view) + +print("") +list_of_lists = matr.tolist() +for row in list_of_lists: + print(row) diff --git a/examples/pybind11/external_usm_allocation/setup.py b/examples/pybind11/external_usm_allocation/setup.py new file mode 100644 index 0000000000..2802075d41 --- /dev/null +++ b/examples/pybind11/external_usm_allocation/setup.py @@ -0,0 +1,34 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pybind11.setup_helpers import Pybind11Extension +from setuptools import setup + +import dpctl + +ext_modules = [ + Pybind11Extension( + "external_usm_alloc", + ["./_usm_alloc_example.cpp"], + include_dirs=[dpctl.get_include()], + extra_compile_args=["-fPIC"], + extra_link_args=["-fPIC"], + libraries=["sycl"], + language="c++", + ) +] + +setup(name="external_usm_alloc", ext_modules=ext_modules) diff --git a/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp b/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp index 9f1cc19ee2..b90697a4f8 100644 --- a/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp +++ b/examples/pybind11/use_dpctl_syclqueue/pybind11_example.cpp @@ -1,9 +1,39 @@ +//==- pybind11_example.cpp - Example of Pybind11 extension working with -===// +// dpctl Python objects. +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements Pybind11-generated extension exposing functions that +/// take dpctl Python objects, such as dpctl.SyclQueue, dpctl.SyclDevice as +/// arguments. +/// +//===----------------------------------------------------------------------===// + #include #include #include #include // clang-format off +// Ordering of includes is important here. dpctl_sycl_types defines types +// used by dpctl's Python C-API headers. #include "dpctl_sycl_types.h" #include "../_sycl_queue.h" #include "../_sycl_queue_api.h" From a8fa412b90bf20260dc473f901eee25aef61dd25 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 28 Sep 2021 13:44:25 -0500 Subject: [PATCH 2/2] Closes #585 Adds `#include ` conditionally for C compilers. Then the cython generate C source file can be compiled using gcc 9.3 as follows: ``` gcc -fmax-errors=1 a.c -fPIC -I$(python -c "import dpctl; print(dpctl.get_include())") -I$(python3-config --includes) $(pyt hon3-config --ldflags) -fno-lto -shared -oa$(python3-config --extension-suffix) ``` --- dpctl-capi/include/dpctl_data_types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpctl-capi/include/dpctl_data_types.h b/dpctl-capi/include/dpctl_data_types.h index 21c2a70744..58001fd24d 100644 --- a/dpctl-capi/include/dpctl_data_types.h +++ b/dpctl-capi/include/dpctl_data_types.h @@ -37,6 +37,10 @@ #include #include +#ifndef __cplusplus +#include +#endif + #ifndef _MSC_VER #if !defined(UINT32_MAX)