From 701c05bc3548687024863c7408ee745af0c61549 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 17 Jul 2023 08:37:59 -0500 Subject: [PATCH] No need to call _copy_overlapping if src and dst address same memory ``` In [1]: import dpctl.tensor as dpt, dpctl, dpctl.utils In [2]: n, m = 8 * 540, 8 * 960 In [3]: a = dpt.ones((m, n)) In [4]: b = dpt.zeros((m, n)) In [5]: b_s = dpt.zeros((m, n+2)) In [6]: with dpctl.utils.onetrace_enabled(): ...: b_s[:,:-2] += a ...: Device Timeline (queue: 0x556080b9cea0): zeCommandListAppendMemoryCopy(H2D)[48 bytes]<4.1> [ns] = 16946404661 (append) 16952292497 (submit) 16952613747 (start) 16952623538 (end) Device Timeline (queue: 0x556080b9cea0): dpctl::tensor::kernels::add::add_inplace_strided_kernel[SIMD32 {64800; 1; 1} {512; 1; 1}]<5.1> [ns] = 17017855801 (append) 17018342202 (submit) 17019138920 (start) 17030770482 (end) ``` Earlier, two more copy operations were being performed as well. --- dpctl/tensor/_copy_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index c220b61b26..63aca6ad06 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -213,6 +213,11 @@ def _copy_same_shape(dst, src): """Assumes src and dst have the same shape.""" # check that memory regions do not overlap if ti._array_overlap(dst, src): + if src._pointer == dst._pointer and ( + src is dst + or (src.strides == dst.strides and src.dtype == dst.dtype) + ): + return _copy_overlapping(src=src, dst=dst) return