From a2a53cb0ab36c2fe88a50084b80399d0cc6afb5d Mon Sep 17 00:00:00 2001 From: dixyes Date: Wed, 1 Dec 2021 13:18:34 +0800 Subject: [PATCH 1/8] Port zend_cpuid for windows arm64 --- Zend/zend_cpuinfo.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Zend/zend_cpuinfo.c b/Zend/zend_cpuinfo.c index 08843a9401f46..53359fffde5bb 100644 --- a/Zend/zend_cpuinfo.c +++ b/Zend/zend_cpuinfo.c @@ -55,8 +55,15 @@ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo #endif } # endif -#elif defined(ZEND_WIN32) && !defined(__clang__) -# include +#elif defined(ZEND_WIN32) +# if defined(_M_ARM64) +# include +static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) { + // TODO: use registry to do this + cpuinfo->eax = 0; +} +# elif !defined(__clang__) +# include static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) { int regs[4]; @@ -67,6 +74,7 @@ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo cpuinfo->ecx = regs[2]; cpuinfo->edx = regs[3]; } +# endif #else static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) { cpuinfo->eax = 0; From ba53063194a90ae51a24793ec9b92416062cbb60 Mon Sep 17 00:00:00 2001 From: dixyes Date: Wed, 20 Jul 2022 23:09:54 +0800 Subject: [PATCH 2/8] Fix zend_atomic windows arm64 build --- Zend/zend_atomic.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Zend/zend_atomic.h b/Zend/zend_atomic.h index afc210e6336d5..c10bb97e4f71f 100644 --- a/Zend/zend_atomic.h +++ b/Zend/zend_atomic.h @@ -57,6 +57,13 @@ BEGIN_EXTERN_C() #if ZEND_WIN32 +#ifndef InterlockedExchange8 +#define InterlockedExchange8 _InterlockedExchange8 +#endif +#ifndef InterlockedOr8 +#define InterlockedOr8 _InterlockedOr8 +#endif + #define ZEND_ATOMIC_BOOL_INIT(obj, desired) ((obj)->value = (desired)) static zend_always_inline bool zend_atomic_bool_exchange_ex(zend_atomic_bool *obj, bool desired) { From 496266dce3bb8563c59a9593807c96febcd7d735 Mon Sep 17 00:00:00 2001 From: dixyes Date: Sat, 23 Jul 2022 13:14:29 +0800 Subject: [PATCH 3/8] Fix windows arm64 multiply --- Zend/zend_multiply.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Zend/zend_multiply.h b/Zend/zend_multiply.h index c6cd472a237b1..a99e858bd7798 100644 --- a/Zend/zend_multiply.h +++ b/Zend/zend_multiply.h @@ -94,6 +94,19 @@ (dval) = (double)(a) * (double)(b); \ } \ } while (0) +# elif defined(_M_ARM64) +# pragma intrinsic(__mulh) +# define ZEND_SIGNED_MULTIPLY_LONG(a, b, lval, dval, usedval) do { \ + __int64 __high = __mulh((a), (b)); \ + __int64 __low = (a) * (b); \ + if ((__low >> 63I64) == __high) { \ + (usedval) = 0; \ + (lval) = __low; \ + } else { \ + (usedval) = 1; \ + (dval) = (double)(a) * (double)(b); \ + } \ +} while (0) # else # define ZEND_SIGNED_MULTIPLY_LONG(a, b, lval, dval, usedval) do { \ zend_long __lres = (a) * (b); \ From 100311cc535858f73d48178e0397fcac47556bcb Mon Sep 17 00:00:00 2001 From: dixyes Date: Sat, 23 Jul 2022 14:19:24 +0800 Subject: [PATCH 4/8] Enable arm64 neon for windows in standard extension --- ext/standard/base64.c | 12 ++++++------ ext/standard/string.c | 31 ++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/ext/standard/base64.c b/ext/standard/base64.c index 7a237d09ecc2e..3893438839aee 100644 --- a/ext/standard/base64.c +++ b/ext/standard/base64.c @@ -51,7 +51,7 @@ static const short base64_reverse_table[256] = { }; /* }}} */ -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(_M_ARM64) #include static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT) @@ -118,11 +118,11 @@ static zend_always_inline unsigned char *neon_base64_encode(const unsigned char *left = inl; return out; } -#endif /* __aarch64__ */ +#endif /* defined(__aarch64__) || defined(_M_ARM64) */ static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */ { -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(_M_ARM64) if (inl >= 16 * 3) { size_t left = 0; out = neon_base64_encode(in, inl, out, &left); @@ -161,7 +161,7 @@ static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned c } /* }}} */ -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(_M_ARM64) static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) { const uint8x16_t higher_nibble = vshrq_n_u8(input, 4); const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f)); @@ -241,14 +241,14 @@ static zend_always_inline size_t neon_base64_decode(const unsigned char *in, siz *left = inl; return out - out_orig; } -#endif /* __aarch64__ */ +#endif /* defined(__aarch64__) || defined(_M_ARM64) */ static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, bool strict) /* {{{ */ { int ch; size_t i = 0, padding = 0, j = *outl; -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(_M_ARM64) if (inl >= 16 * 4) { size_t left = 0; j += neon_base64_decode(in, inl, out, &left); diff --git a/ext/standard/string.c b/ext/standard/string.c index faeeebd2c5b8f..e53f96ee1ba2b 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -3338,7 +3338,7 @@ PHP_FUNCTION(strtr) /* {{{ Reverse a string */ #ifdef ZEND_INTRIN_SSSE3_NATIVE #include -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(_M_ARM64) #include #endif PHP_FUNCTION(strrev) @@ -3385,6 +3385,19 @@ PHP_FUNCTION(strrev) e -= 16; } while (e - s > 15); } +#elif defined(_M_ARM64) + if (e - s > 15) { + do { + const __n128 str = vld1q_u8((uint8_t *)(e - 15)); + /* Synthesize rev128 with a rev64 + ext. */ + /* strange force cast limit on windows: you cannot convert anything */ + const __n128 rev = vrev64q_u8(str); + const __n128 ext = vextq_u64(rev, rev, 1); + vst1q_u8((uint8_t *)p, ext); + p += 16; + e -= 16; + } while (e - s > 15); + } #endif while (e >= s) { *p++ = *e--; @@ -3864,7 +3877,7 @@ zend_string *php_addslashes_sse42(zend_string *str) /* }}} */ #endif -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(_M_ARM64) typedef union { uint8_t mem[16]; uint64_t dw[2]; @@ -3899,7 +3912,7 @@ static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *s } return target; } -#endif /* __aarch64__ */ +#endif /* defined(__aarch64__) || defined(_M_ARM64) */ #ifndef ZEND_INTRIN_SSE4_2_NATIVE # ifdef ZEND_INTRIN_SSE4_2_RESOLVER @@ -3921,7 +3934,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str) source = ZSTR_VAL(str); end = source + ZSTR_LEN(str); -# ifdef __aarch64__ +# if defined(__aarch64__) || defined(_M_ARM64) quad_word res = {0}; if (ZSTR_LEN(str) > 15) { do { @@ -3932,7 +3945,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str) } while ((end - source) > 15); } /* Finish the last 15 bytes or less with the scalar loop. */ -# endif /* __aarch64__ */ +# endif /* defined(__aarch64__) || defined(_M_ARM64) */ while (source < end) { switch (*source) { @@ -3955,7 +3968,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str) memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset); target = ZSTR_VAL(new_str) + offset; -# ifdef __aarch64__ +# if defined(__aarch64__) || defined(_M_ARM64) if (res.dw[0] | res.dw[1]) { target = aarch64_add_slashes(res, source, target); source += 16; @@ -3971,7 +3984,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str) } } /* Finish the last 15 bytes or less with the scalar loop. */ -# endif /* __aarch64__ */ +# endif /* defined(__aarch64__) || defined(_M_ARM64) */ while (source < end) { switch (*source) { @@ -4010,7 +4023,7 @@ PHPAPI zend_string *php_addslashes(zend_string *str) * be careful, this edits the string in-place */ static zend_always_inline char *php_stripslashes_impl(const char *str, char *out, size_t len) { -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(_M_ARM64) while (len > 15) { uint8x16_t x = vld1q_u8((uint8_t *)str); quad_word q; @@ -4040,7 +4053,7 @@ static zend_always_inline char *php_stripslashes_impl(const char *str, char *out } } /* Finish the last 15 bytes or less with the scalar loop. */ -#endif /* __aarch64__ */ +#endif /* defined(__aarch64__) || defined(_M_ARM64) */ while (len > 0) { if (*str == '\\') { str++; /* skip the slash */ From e82edbde21e3f97a30af80c6cc3d76358764552a Mon Sep 17 00:00:00 2001 From: dixyes Date: Sat, 23 Jul 2022 14:20:25 +0800 Subject: [PATCH 5/8] Enable arm64 neon for windows in zend_hash.c --- Zend/zend_hash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index ffdb0e0385cd4..a4cc298012dc9 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -22,7 +22,7 @@ #include "zend_globals.h" #include "zend_variables.h" -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(_M_ARM64) # include #endif @@ -183,7 +183,7 @@ static zend_always_inline void zend_hash_real_init_mixed_ex(HashTable *ht) _mm_storeu_si128((__m128i*)&HT_HASH_EX(data, 8), xmm0); _mm_storeu_si128((__m128i*)&HT_HASH_EX(data, 12), xmm0); } while (0); -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(_M_ARM64) do { int32x4_t t = vdupq_n_s32(-1); vst1q_s32((int32_t*)&HT_HASH_EX(data, 0), t); From a58e5ce03432fe3c68833b977b2cc582adde949d Mon Sep 17 00:00:00 2001 From: dixyes Date: Sat, 23 Jul 2022 18:25:52 +0800 Subject: [PATCH 6/8] Use ZEND_ENABLE_ZVAL_LONG64 --- ext/opcache/ZendAccelerator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c index 0478e8067e314..901caf696ca8d 100644 --- a/ext/opcache/ZendAccelerator.c +++ b/ext/opcache/ZendAccelerator.c @@ -2190,7 +2190,7 @@ zend_op_array *persistent_compile_file(zend_file_handle *file_handle, int type) ZCSG(hits)++; /* TBFixed: may lose one hit */ persistent_script->dynamic_members.hits++; /* see above */ #else -#ifdef _M_X64 +#if ZEND_ENABLE_ZVAL_LONG64 InterlockedIncrement64(&ZCSG(hits)); #else InterlockedIncrement(&ZCSG(hits)); From 18b5b1792e239098ce2ad123836fc6267ff71016 Mon Sep 17 00:00:00 2001 From: dixyes Date: Sat, 23 Jul 2022 18:26:47 +0800 Subject: [PATCH 7/8] Workaround for msvc arm64 optimization bug --- ext/json/json_encoder.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index 13c5ed0aad683..adb53598326bd 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -71,7 +71,14 @@ static inline void php_json_pretty_print_indent(smart_str *buf, int options, php /* }}} */ -static inline bool php_json_is_valid_double(double d) /* {{{ */ +static +#if defined(_MSC_VER) && defined(_M_ARM64) +// MSVC bug: https://developercommunity.visualstudio.com/t/corrupt-optimization-on-arm64-with-Ox-/10102551 +zend_never_inline +#else +inline +#endif +bool php_json_is_valid_double(double d) /* {{{ */ { return !zend_isinf(d) && !zend_isnan(d); } From 00c340b8d4491eda1a9965c60a2cf752d00e587e Mon Sep 17 00:00:00 2001 From: dixyes Date: Sun, 24 Jul 2022 12:57:18 +0800 Subject: [PATCH 8/8] Add clang checks in __zend_cpuid for windows --- Zend/zend_cpuinfo.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/Zend/zend_cpuinfo.c b/Zend/zend_cpuinfo.c index 53359fffde5bb..d02ad4e21813f 100644 --- a/Zend/zend_cpuinfo.c +++ b/Zend/zend_cpuinfo.c @@ -28,15 +28,15 @@ typedef struct _zend_cpu_info { static zend_cpu_info cpuinfo = {0}; -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) -# if defined(HAVE_CPUID_H) && defined(HAVE_CPUID_COUNT) -# include +#if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) +# if defined(HAVE_CPUID_H) && defined(HAVE_CPUID_COUNT) /* use cpuid.h functions */ +# include static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) { __cpuid_count(func, subfunc, cpuinfo->eax, cpuinfo->ebx, cpuinfo->ecx, cpuinfo->edx); } -# else +# else /* use inline asm */ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) { -#if defined(__i386__) && (defined(__pic__) || defined(__PIC__)) +# if defined(__i386__) && (defined(__pic__) || defined(__PIC__)) /* PIC on i386 uses %ebx, so preserve it. */ __asm__ __volatile__ ( "pushl %%ebx\n" @@ -46,24 +46,17 @@ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo : "=a"(cpuinfo->eax), "=r"(cpuinfo->ebx), "=c"(cpuinfo->ecx), "=d"(cpuinfo->edx) : "a"(func), "c"(subfunc) ); -#else +# else __asm__ __volatile__ ( "cpuid" : "=a"(cpuinfo->eax), "=b"(cpuinfo->ebx), "=c"(cpuinfo->ecx), "=d"(cpuinfo->edx) : "a"(func), "c"(subfunc) ); -#endif +# endif } # endif -#elif defined(ZEND_WIN32) -# if defined(_M_ARM64) -# include -static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) { - // TODO: use registry to do this - cpuinfo->eax = 0; -} -# elif !defined(__clang__) -# include +#elif defined(_MSC_VER) && !defined(__clang__) && (defined(_M_X64) || defined(_M_IX86)) /* use MSVC __cpuidex intrin */ +# include static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) { int regs[4]; @@ -74,8 +67,7 @@ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo cpuinfo->ecx = regs[2]; cpuinfo->edx = regs[3]; } -# endif -#else +#else /* fail back to zero */ static void __zend_cpuid(uint32_t func, uint32_t subfunc, zend_cpu_info *cpuinfo) { cpuinfo->eax = 0; }