From f50a7503f094319e3d521d814e9f6f5cdf76c09d Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 24 Aug 2020 20:09:08 +0300 Subject: [PATCH 1/2] add fast path for x % 1 and x % -1 --- std/assembly/math.ts | 10 + tests/compiler/binary.optimized.wat | 289 +------------------- tests/compiler/binary.untouched.wat | 26 ++ tests/compiler/std/math.optimized.wat | 26 ++ tests/compiler/std/math.untouched.wat | 26 ++ tests/compiler/std/mod.optimized.wat | 26 ++ tests/compiler/std/mod.untouched.wat | 26 ++ tests/compiler/std/typedarray.untouched.wat | 26 ++ 8 files changed, 174 insertions(+), 281 deletions(-) diff --git a/std/assembly/math.ts b/std/assembly/math.ts index 1d79beeff6..d0a0e5459f 100644 --- a/std/assembly/math.ts +++ b/std/assembly/math.ts @@ -1567,6 +1567,11 @@ export namespace NativeMath { } export function mod(x: f64, y: f64): f64 { // see: musl/src/math/fmod.c + // TODO: move this rule to compiler's optimization pass + if (builtin_abs(y) == 1.0) { + // x % 1, x % -1 ==> sign(x) * abs(x - trunc(x)) + return builtin_copysign(x - builtin_trunc(x), x); + } var ux = reinterpret(x); var uy = reinterpret(y); var ex = (ux >> 52 & 0x7FF); @@ -2871,6 +2876,11 @@ export namespace NativeMathf { } export function mod(x: f32, y: f32): f32 { // see: musl/src/math/fmodf.c + // TODO: move this rule to compiler's optimization pass + if (builtin_abs(y) == 1.0) { + // x % 1, x % -1 ==> sign(x) * abs(x - trunc(x)) + return builtin_copysign(x - builtin_trunc(x), x); + } var ux = reinterpret(x); var uy = reinterpret(y); var ex = (ux >> 23 & 0xFF); diff --git a/tests/compiler/binary.optimized.wat b/tests/compiler/binary.optimized.wat index 77b7d23daf..818d1ff040 100644 --- a/tests/compiler/binary.optimized.wat +++ b/tests/compiler/binary.optimized.wat @@ -43,293 +43,20 @@ local.get $2 ) (func $~lib/math/NativeMathf.mod (param $0 f32) (result f32) - (local $1 i32) - (local $2 i32) - (local $3 i32) - (local $4 i32) local.get $0 - i32.reinterpret_f32 - local.tee $1 - i32.const -2147483648 - i32.and - local.set $4 - local.get $1 - i32.const 23 - i32.shr_u - i32.const 255 - i32.and - local.tee $2 - i32.const 255 - i32.eq - if - local.get $0 - local.get $0 - f32.div - return - end - block $folding-inner0 - local.get $1 - i32.const 1 - i32.shl - local.tee $3 - i32.const 2130706432 - i32.le_u - if - local.get $3 - i32.const 2130706432 - i32.eq - br_if $folding-inner0 - local.get $0 - return - end - local.get $2 - if (result i32) - local.get $1 - i32.const 8388607 - i32.and - i32.const 8388608 - i32.or - else - local.get $1 - i32.const 1 - local.get $2 - local.get $1 - i32.const 9 - i32.shl - i32.clz - i32.sub - local.tee $2 - i32.sub - i32.shl - end - local.set $1 - loop $while-continue|0 - local.get $2 - i32.const 127 - i32.gt_s - if - local.get $1 - i32.const 8388608 - i32.ge_u - if (result i32) - local.get $1 - i32.const 8388608 - i32.eq - br_if $folding-inner0 - local.get $1 - i32.const 8388608 - i32.sub - else - local.get $1 - end - i32.const 1 - i32.shl - local.set $1 - local.get $2 - i32.const 1 - i32.sub - local.set $2 - br $while-continue|0 - end - end - local.get $1 - i32.const 8388608 - i32.ge_u - if - local.get $1 - i32.const 8388608 - i32.eq - br_if $folding-inner0 - local.get $1 - i32.const 8388608 - i32.sub - local.set $1 - end - local.get $1 - local.get $1 - i32.const 8 - i32.shl - i32.clz - local.tee $3 - i32.shl - local.set $1 - local.get $2 - local.get $3 - i32.sub - local.tee $2 - i32.const 0 - i32.gt_s - if (result i32) - local.get $1 - i32.const 8388608 - i32.sub - local.get $2 - i32.const 23 - i32.shl - i32.or - else - local.get $1 - i32.const 1 - local.get $2 - i32.sub - i32.shr_u - end - local.get $4 - i32.or - f32.reinterpret_i32 - return - end - f32.const 0 local.get $0 - f32.mul + f32.trunc + f32.sub + local.get $0 + f32.copysign ) (func $~lib/math/NativeMath.mod (param $0 f64) (result f64) - (local $1 i64) - (local $2 i64) - (local $3 i64) - (local $4 i64) local.get $0 - i64.reinterpret_f64 - local.tee $1 - i64.const 63 - i64.shr_u - local.set $4 - local.get $1 - i64.const 52 - i64.shr_u - i64.const 2047 - i64.and - local.tee $2 - i64.const 2047 - i64.eq - if - local.get $0 - local.get $0 - f64.div - return - end - block $folding-inner0 - local.get $1 - i64.const 1 - i64.shl - local.tee $3 - i64.const 9214364837600034816 - i64.le_u - if - local.get $3 - i64.const 9214364837600034816 - i64.eq - br_if $folding-inner0 - local.get $0 - return - end - local.get $2 - i64.eqz - if (result i64) - local.get $1 - i64.const 0 - local.get $2 - local.get $1 - i64.const 12 - i64.shl - i64.clz - i64.sub - local.tee $2 - i64.sub - i64.const 1 - i64.add - i64.shl - else - local.get $1 - i64.const 4503599627370495 - i64.and - i64.const 4503599627370496 - i64.or - end - local.set $1 - loop $while-continue|0 - local.get $2 - i64.const 1023 - i64.gt_s - if - local.get $1 - i64.const 4503599627370496 - i64.ge_u - if (result i64) - local.get $1 - i64.const 4503599627370496 - i64.eq - br_if $folding-inner0 - local.get $1 - i64.const 4503599627370496 - i64.sub - else - local.get $1 - end - i64.const 1 - i64.shl - local.set $1 - local.get $2 - i64.const 1 - i64.sub - local.set $2 - br $while-continue|0 - end - end - local.get $1 - i64.const 4503599627370496 - i64.ge_u - if - local.get $1 - i64.const 4503599627370496 - i64.eq - br_if $folding-inner0 - local.get $1 - i64.const 4503599627370496 - i64.sub - local.set $1 - end - local.get $1 - local.get $1 - i64.const 11 - i64.shl - i64.clz - local.tee $3 - i64.shl - local.set $1 - local.get $2 - local.get $3 - i64.sub - local.tee $2 - i64.const 0 - i64.gt_s - if (result i64) - local.get $1 - i64.const 4503599627370496 - i64.sub - local.get $2 - i64.const 52 - i64.shl - i64.or - else - local.get $1 - i64.const 0 - local.get $2 - i64.sub - i64.const 1 - i64.add - i64.shr_u - end - local.get $4 - i64.const 63 - i64.shl - i64.or - f64.reinterpret_i64 - return - end - f64.const 0 local.get $0 - f64.mul + f64.trunc + f64.sub + local.get $0 + f64.copysign ) (func $start:binary global.get $binary/i diff --git a/tests/compiler/binary.untouched.wat b/tests/compiler/binary.untouched.wat index 383098988f..a7a522d0ef 100644 --- a/tests/compiler/binary.untouched.wat +++ b/tests/compiler/binary.untouched.wat @@ -1214,6 +1214,19 @@ (local $9 i32) (local $10 i32) (local $11 i32) + local.get $1 + f32.abs + f32.const 1 + f32.eq + if + local.get $0 + local.get $0 + f32.trunc + f32.sub + local.get $0 + f32.copysign + return + end local.get $0 i32.reinterpret_f32 local.set $2 @@ -2076,6 +2089,19 @@ (local $9 i64) (local $10 i32) (local $11 i64) + local.get $1 + f64.abs + f64.const 1 + f64.eq + if + local.get $0 + local.get $0 + f64.trunc + f64.sub + local.get $0 + f64.copysign + return + end local.get $0 i64.reinterpret_f64 local.set $2 diff --git a/tests/compiler/std/math.optimized.wat b/tests/compiler/std/math.optimized.wat index 2a0b6d8bc7..c28d5fba12 100644 --- a/tests/compiler/std/math.optimized.wat +++ b/tests/compiler/std/math.optimized.wat @@ -6661,6 +6661,19 @@ (local $7 i64) (local $8 i64) local.get $1 + f64.abs + f64.const 1 + f64.eq + if + local.get $0 + local.get $0 + f64.trunc + f64.sub + local.get $0 + f64.copysign + return + end + local.get $1 i64.reinterpret_f64 local.tee $3 i64.const 52 @@ -6877,6 +6890,19 @@ (local $7 i32) (local $8 i32) local.get $1 + f32.abs + f32.const 1 + f32.eq + if + local.get $0 + local.get $0 + f32.trunc + f32.sub + local.get $0 + f32.copysign + return + end + local.get $1 i32.reinterpret_f32 local.tee $3 i32.const 23 diff --git a/tests/compiler/std/math.untouched.wat b/tests/compiler/std/math.untouched.wat index 24502947e0..7becd266bf 100644 --- a/tests/compiler/std/math.untouched.wat +++ b/tests/compiler/std/math.untouched.wat @@ -9167,6 +9167,19 @@ (local $9 i64) (local $10 i32) (local $11 i64) + local.get $1 + f64.abs + f64.const 1 + f64.eq + if + local.get $0 + local.get $0 + f64.trunc + f64.sub + local.get $0 + f64.copysign + return + end local.get $0 i64.reinterpret_f64 local.set $2 @@ -9447,6 +9460,19 @@ (local $9 i32) (local $10 i32) (local $11 i32) + local.get $1 + f32.abs + f32.const 1 + f32.eq + if + local.get $0 + local.get $0 + f32.trunc + f32.sub + local.get $0 + f32.copysign + return + end local.get $0 i32.reinterpret_f32 local.set $2 diff --git a/tests/compiler/std/mod.optimized.wat b/tests/compiler/std/mod.optimized.wat index 2bc12c1b9c..17c732c475 100644 --- a/tests/compiler/std/mod.optimized.wat +++ b/tests/compiler/std/mod.optimized.wat @@ -22,6 +22,19 @@ (local $7 i64) (local $8 i64) local.get $1 + f64.abs + f64.const 1 + f64.eq + if + local.get $0 + local.get $0 + f64.trunc + f64.sub + local.get $0 + f64.copysign + return + end + local.get $1 i64.reinterpret_f64 local.tee $3 i64.const 52 @@ -263,6 +276,19 @@ (local $7 i32) (local $8 i32) local.get $1 + f32.abs + f32.const 1 + f32.eq + if + local.get $0 + local.get $0 + f32.trunc + f32.sub + local.get $0 + f32.copysign + return + end + local.get $1 i32.reinterpret_f32 local.tee $3 i32.const 23 diff --git a/tests/compiler/std/mod.untouched.wat b/tests/compiler/std/mod.untouched.wat index 4a34255f27..942c93ec43 100644 --- a/tests/compiler/std/mod.untouched.wat +++ b/tests/compiler/std/mod.untouched.wat @@ -27,6 +27,19 @@ (local $9 i64) (local $10 i32) (local $11 i64) + local.get $1 + f64.abs + f64.const 1 + f64.eq + if + local.get $0 + local.get $0 + f64.trunc + f64.sub + local.get $0 + f64.copysign + return + end local.get $0 i64.reinterpret_f64 local.set $2 @@ -331,6 +344,19 @@ (local $9 i32) (local $10 i32) (local $11 i32) + local.get $1 + f32.abs + f32.const 1 + f32.eq + if + local.get $0 + local.get $0 + f32.trunc + f32.sub + local.get $0 + f32.copysign + return + end local.get $0 i32.reinterpret_f32 local.set $2 diff --git a/tests/compiler/std/typedarray.untouched.wat b/tests/compiler/std/typedarray.untouched.wat index 6c73fd6d49..09e9e6da94 100644 --- a/tests/compiler/std/typedarray.untouched.wat +++ b/tests/compiler/std/typedarray.untouched.wat @@ -19021,6 +19021,19 @@ (local $9 i32) (local $10 i32) (local $11 i32) + local.get $1 + f32.abs + f32.const 1 + f32.eq + if + local.get $0 + local.get $0 + f32.trunc + f32.sub + local.get $0 + f32.copysign + return + end local.get $0 i32.reinterpret_f32 local.set $2 @@ -19435,6 +19448,19 @@ (local $9 i64) (local $10 i32) (local $11 i64) + local.get $1 + f64.abs + f64.const 1 + f64.eq + if + local.get $0 + local.get $0 + f64.trunc + f64.sub + local.get $0 + f64.copysign + return + end local.get $0 i64.reinterpret_f64 local.set $2 From 61c9308bf616c1cdfa92bc9d05c80037eb73b709 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Mon, 24 Aug 2020 23:30:43 +0300 Subject: [PATCH 2/2] add more detailed comments --- std/assembly/math.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/std/assembly/math.ts b/std/assembly/math.ts index d0a0e5459f..7ff2c4ca58 100644 --- a/std/assembly/math.ts +++ b/std/assembly/math.ts @@ -1567,9 +1567,10 @@ export namespace NativeMath { } export function mod(x: f64, y: f64): f64 { // see: musl/src/math/fmod.c - // TODO: move this rule to compiler's optimization pass if (builtin_abs(y) == 1.0) { - // x % 1, x % -1 ==> sign(x) * abs(x - trunc(x)) + // x % 1, x % -1 ==> sign(x) * abs(x - 1.0 * trunc(x / 1.0)) + // TODO: move this rule to compiler's optimization pass. + // It could be apply for any x % C_pot, where "C_pot" is pow of two const. return builtin_copysign(x - builtin_trunc(x), x); } var ux = reinterpret(x); @@ -2876,9 +2877,10 @@ export namespace NativeMathf { } export function mod(x: f32, y: f32): f32 { // see: musl/src/math/fmodf.c - // TODO: move this rule to compiler's optimization pass if (builtin_abs(y) == 1.0) { - // x % 1, x % -1 ==> sign(x) * abs(x - trunc(x)) + // x % 1, x % -1 ==> sign(x) * abs(x - 1.0 * trunc(x / 1.0)) + // TODO: move this rule to compiler's optimization pass. + // It could be apply for any x % C_pot, where "C_pot" is pow of two const. return builtin_copysign(x - builtin_trunc(x), x); } var ux = reinterpret(x);