From ae8a3b1539447c8dda671249809bbcaf82cda7e8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Fri, 10 Jan 2025 12:15:00 -0600 Subject: [PATCH 1/3] Use --offload-compress linker option to compress offload sections See https://www.intel.com/content/www/us/en/developer/articles/technical/sycl-compilation-device-image-compression.html It is applicable for any SYCL targets. This change results in 28.4% reduction in shared objects sizes with offload sections on Linux. --- dpctl/CMakeLists.txt | 2 ++ dpctl/tensor/CMakeLists.txt | 4 ++-- dpctl/utils/CMakeLists.txt | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dpctl/CMakeLists.txt b/dpctl/CMakeLists.txt index 781944ea0c..ed27157938 100644 --- a/dpctl/CMakeLists.txt +++ b/dpctl/CMakeLists.txt @@ -111,6 +111,8 @@ function(build_dpctl_ext _trgt _src _dest) Python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src}) if (BUILD_DPCTL_EXT_SYCL) add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src}) + target_compile_options(${_trgt} PRIVATE -fno-sycl-id-queries-fit-in-int) + target_link_options(${_trgt} PRIVATE -fsycl-device-code-split=per_kernel --offload-compress) if(_dpctl_sycl_targets) # make fat binary target_compile_options( diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index e7d3896680..11e0f64b56 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -267,7 +267,7 @@ endforeach() set(_linker_options "LINKER:${DPCTL_LDFLAGS}") foreach(python_module_name ${_py_trgts}) target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) - target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) + target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel --offload-compress) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/include @@ -279,7 +279,7 @@ foreach(python_module_name ${_py_trgts}) target_compile_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping ) - endif() + endif() target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping ) diff --git a/dpctl/utils/CMakeLists.txt b/dpctl/utils/CMakeLists.txt index 6d785c8203..51910ed352 100644 --- a/dpctl/utils/CMakeLists.txt +++ b/dpctl/utils/CMakeLists.txt @@ -28,7 +28,7 @@ list(APPEND _pybind11_targets ${python_module_name}) set(_linker_options "LINKER:${DPCTL_LDFLAGS}") foreach(python_module_name ${_pybind11_targets}) target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) - target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) + target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel --offload-compress) target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/include @@ -40,7 +40,7 @@ foreach(python_module_name ${_pybind11_targets}) target_compile_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping ) - endif() + endif() target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping ) From 06c5cd4e11441f381adb2a9c68ee3b6543f1fb28 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Mon, 13 Jan 2025 12:49:46 -0600 Subject: [PATCH 2/3] Add DPCTL_OFFLOAD_COMPRESS:BOOL=OFF option to dpctl CMake script If -DDPCTL_OFFLOAD_COMPRESS=ON is used, DPC++ link-time option `--offload-compress` is used to compress offload sections. The option is OFF by default. --- CMakeLists.txt | 1 + dpctl/CMakeLists.txt | 5 ++++- dpctl/tensor/CMakeLists.txt | 6 +++++- dpctl/utils/CMakeLists.txt | 6 +++++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f265504636..29535b4a62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,7 @@ set(DPCTL_TARGET_HIP "Build DPCTL to target a HIP device architecture" ) option(DPCTL_WITH_REDIST "Build DPCTL assuming DPC++ redistributable is installed into Python prefix" OFF) +option(DPCTL_OFFLOAD_COMPRESS "Build using offload section compression feature of DPC++" OFF) find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH) diff --git a/dpctl/CMakeLists.txt b/dpctl/CMakeLists.txt index ed27157938..1de0bbf77d 100644 --- a/dpctl/CMakeLists.txt +++ b/dpctl/CMakeLists.txt @@ -112,7 +112,10 @@ function(build_dpctl_ext _trgt _src _dest) if (BUILD_DPCTL_EXT_SYCL) add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src}) target_compile_options(${_trgt} PRIVATE -fno-sycl-id-queries-fit-in-int) - target_link_options(${_trgt} PRIVATE -fsycl-device-code-split=per_kernel --offload-compress) + target_link_options(${_trgt} PRIVATE -fsycl-device-code-split=per_kernel) + if (DPCTL_OFFLOAD_COMPRESS) + target_link_options(${_trgt} PRIVATE --offload-compress) + endif() if(_dpctl_sycl_targets) # make fat binary target_compile_options( diff --git a/dpctl/tensor/CMakeLists.txt b/dpctl/tensor/CMakeLists.txt index 11e0f64b56..9c53bd8b08 100644 --- a/dpctl/tensor/CMakeLists.txt +++ b/dpctl/tensor/CMakeLists.txt @@ -267,7 +267,11 @@ endforeach() set(_linker_options "LINKER:${DPCTL_LDFLAGS}") foreach(python_module_name ${_py_trgts}) target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) - target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel --offload-compress) + target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) + if (DPCTL_OFFLOAD_COMPRESS) + target_link_options(${python_module_name} PRIVATE --offload-compress) + endif() + target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/include diff --git a/dpctl/utils/CMakeLists.txt b/dpctl/utils/CMakeLists.txt index 51910ed352..a684983c74 100644 --- a/dpctl/utils/CMakeLists.txt +++ b/dpctl/utils/CMakeLists.txt @@ -28,7 +28,11 @@ list(APPEND _pybind11_targets ${python_module_name}) set(_linker_options "LINKER:${DPCTL_LDFLAGS}") foreach(python_module_name ${_pybind11_targets}) target_compile_options(${python_module_name} PRIVATE -fno-sycl-id-queries-fit-in-int) - target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel --offload-compress) + target_link_options(${python_module_name} PRIVATE -fsycl-device-code-split=per_kernel) + if (DPCTL_OFFLOAD_COMPRESS) + target_link_options(${python_module_name} PRIVATE --offload-compress) + endif() + target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/include From 8b7a79b6eb8815cb4484af61b1c856a2ce22467a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com> Date: Tue, 14 Jan 2025 05:24:35 -0600 Subject: [PATCH 3/3] Expanded description of DPCTL_OFFLOAD_COMPRESS option ``` $ cmake . -LAH -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx | grep -a2 DPCTL_OFFLOAD_COMPRESS // Build using offload section compression feature of DPC++ to reduce size of shared object with offloading sections DPCTL_OFFLOAD_COMPRESS:BOOL=OFF // Build DPCTL to target CUDA devices ``` --- CMakeLists.txt | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 29535b4a62..4a446c19a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,8 +34,16 @@ set(DPCTL_TARGET_HIP CACHE STRING "Build DPCTL to target a HIP device architecture" ) -option(DPCTL_WITH_REDIST "Build DPCTL assuming DPC++ redistributable is installed into Python prefix" OFF) -option(DPCTL_OFFLOAD_COMPRESS "Build using offload section compression feature of DPC++" OFF) +option( + DPCTL_WITH_REDIST + "Build DPCTL assuming DPC++ redistributable is installed into Python prefix" + OFF) +option( + DPCTL_OFFLOAD_COMPRESS + "Build using offload section compression feature of DPC++ to reduce \ +size of shared object with offloading sections" + OFF +) find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/cmake NO_DEFAULT_PATH)