From 7cf8a8c1b8f3a4f8d498f45e002f6ce6e2943f0e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 24 Mar 2022 14:35:44 -0500 Subject: [PATCH] -fsycl-device-code-split=per_kernel must be passed to linker per compiler team Now, envoking copy kernels for different types calls JIT-ting as evidenced by ``` (idp_2022) [14:54:46 ansatnuc04 dpctl]$ SYCL_PI_TRACE=-1 python -c "import dpctl.tensor as dpt; print(dpt.__file__); X = dpt.empty(10, 'd'); Y = dpt.empty(10, 'i4'); X[:] = Y; X1 = dpt.empty(10,'f4'); X1[:] = Y" >& pi_trace.txt (idp_2022) [14:54:55 ansatnuc04 dpctl]$ grep -c piProgramCreate pi_trace.txt 2 ``` Previously the output was 1 (all 14*14 kernels were compiled all at once which was slower) on the first invocation. --- dpctl/tensor/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index bab49e8d5c..23a99304d8 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -19,7 +19,7 @@ set(python_module_name _tensor_impl) pybind11_add_module(${python_module_name} MODULE ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/tensor_py.cpp ) -target_compile_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) +target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../include