From 2db04e66739a17d872eeb8abd1b3c18158986d2c Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Thu, 8 May 2025 17:53:29 -0700 Subject: [PATCH 1/4] add new complex vm functions --- CHANGELOG.md | 3 +- mkl_umath/src/mkl_umath_loops.c.src | 70 ++++++++++++++--------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91e5da6..ff78768 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added * Added mkl implementation for floating point data-types of `exp2`, `log2`, `fabs`, `copysign`, `nextafter`, `fmax`, `fmin` and `remainder` functions [gh-81](https://github.com/IntelPython/mkl_umath/pull/81) +* Added mkl implementation for complex data-types of `conjugate` and `absolute` functions [gh-86](https://github.com/IntelPython/mkl_umath/pull/86) -## [0.2.0] (06/DD/2025) +## [0.2.0] (06/03/2025) This release updates `mkl_umath` to be aligned with both numpy-1.26.x and numpy-2.x.x. ### Added diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index 6bee4b2..9fbc40a 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -129,6 +129,7 @@ * when these conditions are not met VML functions may produce incorrect output */ #define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) ) +#define DISJOINT_OR_SAME_TWO_DTYPES(p1, p2, n, s1, s2) (((p1) == (p2)) || ((p2) + (n)*(s2) < (p1)) || ((p1) + (n)*(s1) < (p2)) ) /* * include vectorized functions and dispatchers @@ -316,8 +317,7 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st can_vectorize , const @type@ in1 = *(@type@ *)ip1; - const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; - ignore_fpstatus |= (invalid_cases ? 1 : 0); + ignore_fpstatus = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; *(@type@ *)op1 = @scalarf@(in1); ) } @@ -355,8 +355,7 @@ mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *s can_vectorize , const @type@ in1 = *(@type@ *)ip1; - const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; - ignore_fpstatus |= (invalid_cases ? 1 : 0); + ignore_fpstatus = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; *(@type@ *)op1 = @scalarf@(in1); ) } @@ -493,8 +492,7 @@ mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *s can_vectorize , const @type@ in1 = *(@type@ *)ip1; - const int invalid_cases = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY; - ignore_fpstatus |= (invalid_cases ? 1 : 0); + ignore_fpstatus = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY; *(@type@ *)op1 = @scalarf@(in1); ) } @@ -2124,10 +2122,9 @@ mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_in BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - int invalid_cases = !npy_isnan(in1) && in2 == 0; - invalid_cases |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2); - invalid_cases |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY); - ignore_fpstatus |= (invalid_cases ? 1 : 0); + ignore_fpstatus = !npy_isnan(in1) && in2 == 0; + ignore_fpstatus |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2); + ignore_fpstatus |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY); divmod@c@(in1, in2, (@type@ *)op1); } } @@ -2376,10 +2373,10 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i * complex types * #TYPE = CFLOAT, CDOUBLE# * #ftype = npy_float, npy_double# + * #type = npy_cfloat, npy_cdouble# * #c = f, # - * #C = F, # - * #s = s, d# - * #SUPPORTED_BY_VML = 1, 1# + * #C = F, # + * #s = c, z# */ /* similar to pairwise sum of real floats */ @@ -2659,44 +2656,47 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i } } -/* TODO: USE MKL */ void mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { - UNARY_LOOP { - const @ftype@ in1r = ((@ftype@ *)ip1)[0]; - const @ftype@ in1i = ((@ftype@ *)ip1)[1]; - ((@ftype@ *)op1)[0] = in1r; - ((@ftype@ *)op1)[1] = -in1i; - } + const int contig = IS_UNARY_CONT(@type@, @type@); + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); + const int can_vectorize = contig && disjoint_or_same; + + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { + CHUNKED_VML_CALL2(v@s@Conj, dimensions[0], @type@, args[0], args[1]); + /* v@s@Conj(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ + } else { + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + ((@ftype@ *)op1)[0] = in1r; + ((@ftype@ *)op1)[1] = -in1i; + } + } } -/* TODO: USE MKL */ void mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) { + const int contig = IS_UNARY_CONT(@type@, @ftype@); + const int disjoint_or_same = DISJOINT_OR_SAME_TWO_DTYPES(args[0], args[1], dimensions[0], sizeof(@type@), sizeof(@ftype@)); + const int can_vectorize = contig && disjoint_or_same; int ignore_fpstatus = 0; - - // FIXME: abs function VML for complex numbers breaks FFT test_basic.py - //if(steps[0]/2 == sizeof(@ftype@) && steps[1] == sizeof(@ftype@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { -#if @SUPPORTED_BY_VML@ - if(0 == 1) { + + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { ignore_fpstatus = 1; - CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @ftype@, args[0], args[1]); - /* v@s@Abs(dimensions[0], (@ftype@ *) args[0], (@ftype@ *) args[1]); */ - } else -#endif - { + CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]); + /* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ + } else { UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; const @ftype@ in1i = ((@ftype@ *)ip1)[1]; - if(in1r == 0.0 && in1i == 0.0){ - ignore_fpstatus = 1; - } + ignore_fpstatus = npy_isnan(in1r) && npy_isnan(in1i); *((@ftype@ *)op1) = hypot@c@(in1r, in1i); } } if(ignore_fpstatus) { - feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID); + feclearexcept(FE_INVALID); } } From 110f02f19eadeef1ce8b221dedda8ff3cba5823d Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Mon, 14 Jul 2025 08:48:34 -0700 Subject: [PATCH 2/4] address comments --- mkl_umath/src/mkl_umath_loops.c.src | 31 ++++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index 9fbc40a..2f2d8a9 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -317,7 +317,8 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st can_vectorize , const @type@ in1 = *(@type@ *)ip1; - ignore_fpstatus = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; + const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; + ignore_fpstatus |= invalid_cases; *(@type@ *)op1 = @scalarf@(in1); ) } @@ -355,7 +356,8 @@ mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *s can_vectorize , const @type@ in1 = *(@type@ *)ip1; - ignore_fpstatus = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; + const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; + ignore_fpstatus |= invalid_cases; *(@type@ *)op1 = @scalarf@(in1); ) } @@ -492,7 +494,8 @@ mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *s can_vectorize , const @type@ in1 = *(@type@ *)ip1; - ignore_fpstatus = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY; + const int invalid_cases = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY; + ignore_fpstatus |= invalid_cases; *(@type@ *)op1 = @scalarf@(in1); ) } @@ -2122,9 +2125,10 @@ mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_in BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - ignore_fpstatus = !npy_isnan(in1) && in2 == 0; - ignore_fpstatus |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2); - ignore_fpstatus |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY); + int invalid_cases = !npy_isnan(in1) && in2 == 0; + invalid_cases |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2); + invalid_cases |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY); + ignore_fpstatus |= invalid_cases; divmod@c@(in1, in2, (@type@ *)op1); } } @@ -2666,13 +2670,13 @@ mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_in CHUNKED_VML_CALL2(v@s@Conj, dimensions[0], @type@, args[0], args[1]); /* v@s@Conj(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ } else { - UNARY_LOOP { - const @ftype@ in1r = ((@ftype@ *)ip1)[0]; - const @ftype@ in1i = ((@ftype@ *)ip1)[1]; - ((@ftype@ *)op1)[0] = in1r; - ((@ftype@ *)op1)[1] = -in1i; - } - } + UNARY_LOOP { + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; + ((@ftype@ *)op1)[0] = in1r; + ((@ftype@ *)op1)[1] = -in1i; + } + } } void @@ -2691,7 +2695,6 @@ mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_int UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; const @ftype@ in1i = ((@ftype@ *)ip1)[1]; - ignore_fpstatus = npy_isnan(in1r) && npy_isnan(in1i); *((@ftype@ *)op1) = hypot@c@(in1r, in1i); } } From 9bff9c115bf575479f900797ffbb87cd03e1dce8 Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Mon, 14 Jul 2025 08:57:35 -0700 Subject: [PATCH 3/4] use npy_hypot --- CMakeLists.txt | 2 ++ mkl_umath/src/mkl_umath_loops.c.src | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5391aea..6294289 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,8 @@ set_target_properties(${_trgt} PROPERTIES C_STANDARD 99 ) target_include_directories(${_trgt} PUBLIC mkl_umath/src/ ${Python_NumPy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) +set(NPYMATH_PATH "${Python_NumPy_INCLUDE_DIRS}/../lib") +target_link_libraries(${_trgt} PRIVATE ${NPYMATH_PATH}/libnpymath.a) target_link_libraries(${_trgt} PUBLIC MKL::MKL ${Python_LIBRARIES}) target_link_options(${_trgt} PUBLIC ${_linker_options}) target_compile_options(${_trgt} PUBLIC -fveclib=SVML) diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index 2f2d8a9..deaab37 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -2695,7 +2695,7 @@ mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_int UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; const @ftype@ in1i = ((@ftype@ *)ip1)[1]; - *((@ftype@ *)op1) = hypot@c@(in1r, in1i); + *((@ftype@ *)op1) = npy_hypot@c@(in1r, in1i); } } if(ignore_fpstatus) { From 4b5aa364c78659abd2ba664019fbca68e6450af2 Mon Sep 17 00:00:00 2001 From: Vahid Tavanashad Date: Mon, 14 Jul 2025 09:34:52 -0700 Subject: [PATCH 4/4] Revert "use npy_hypot" This reverts commit 9bff9c115bf575479f900797ffbb87cd03e1dce8. --- CMakeLists.txt | 2 -- mkl_umath/src/mkl_umath_loops.c.src | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6294289..5391aea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,8 +111,6 @@ set_target_properties(${_trgt} PROPERTIES C_STANDARD 99 ) target_include_directories(${_trgt} PUBLIC mkl_umath/src/ ${Python_NumPy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) -set(NPYMATH_PATH "${Python_NumPy_INCLUDE_DIRS}/../lib") -target_link_libraries(${_trgt} PRIVATE ${NPYMATH_PATH}/libnpymath.a) target_link_libraries(${_trgt} PUBLIC MKL::MKL ${Python_LIBRARIES}) target_link_options(${_trgt} PUBLIC ${_linker_options}) target_compile_options(${_trgt} PUBLIC -fveclib=SVML) diff --git a/mkl_umath/src/mkl_umath_loops.c.src b/mkl_umath/src/mkl_umath_loops.c.src index deaab37..2f2d8a9 100644 --- a/mkl_umath/src/mkl_umath_loops.c.src +++ b/mkl_umath/src/mkl_umath_loops.c.src @@ -2695,7 +2695,7 @@ mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_int UNARY_LOOP { const @ftype@ in1r = ((@ftype@ *)ip1)[0]; const @ftype@ in1i = ((@ftype@ *)ip1)[1]; - *((@ftype@ *)op1) = npy_hypot@c@(in1r, in1i); + *((@ftype@ *)op1) = hypot@c@(in1r, in1i); } } if(ignore_fpstatus) {