From 090707ba50881f9f31ba140f1a3f792b338b5ea7 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 9 Aug 2020 11:50:54 +0300 Subject: [PATCH 1/4] add joinThreeStrings helper --- std/assembly/util/string.ts | 42 +++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/std/assembly/util/string.ts b/std/assembly/util/string.ts index 76e993ec96..6eb66a85b2 100644 --- a/std/assembly/util/string.ts +++ b/std/assembly/util/string.ts @@ -1,16 +1,17 @@ import { itoa32, utoa32, itoa64, utoa64, dtoa, itoa_buffered, dtoa_buffered, MAX_DOUBLE_LENGTH } from "./number"; import { ipow32 } from "../math"; +import { string } from "string"; // All tables are stored as two staged lookup tables (static tries) // because the full range of Unicode symbols can't be efficiently // represented as-is in memory (see Unicode spec ch 5, p.196): -// https://www.unicode.org/versions/Unicode12.0.0/ch05.pdf +// https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf // Tables have been generated using these forked musl tools: // https://github.com/MaxGraey/musl-chartable-tools/tree/case-ignorable // Lookup table to check if a character is alphanumeric or not // See: https://git.musl-libc.org/cgit/musl/tree/src/ctype/alpha.h -// size: 3904 bytes +// size: 4032 bytes (compressed to ~3500 after binaryen) // @ts-ignore @inline @lazy const ALPHA_TABLE = memory.data([ 18,17,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,17,34,35,36,17,37,38,39,40, @@ -191,7 +192,7 @@ import { ipow32 } from "../math"; 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,3 ]); -// size: 1568 bytes (compressed to ~1380 bytes after binaryen) +// size: 1568 bytes (compressed to ~1300 bytes after binaryen) // @ts-ignore: decorator @lazy @inline const CASED = memory.data([ 18,19,20,21,22,23,16,16,16,16,16,16,16,16,16,16, @@ -274,7 +275,7 @@ import { ipow32 } from "../math"; 0,0,0,0,0,0,0,0 ]); -// size: 2976 bytes (compressed to ~2050 bytes after binaryen) +// size: 2976 bytes (compressed to ~2000 bytes after binaryen) // @ts-ignore: decorator @lazy @inline const CASE_IGNORABLES = memory.data([ 18,16,19,20,21,22,23,24,25,26,27,28,29,30,31,32, @@ -1017,6 +1018,39 @@ export function joinStringArray(dataStart: usize, length: i32, separator: string return changetype(result); // retains } +export function joinThreeStrings(a: string, b: string, c: string): string { + var bytesLenA = a.length << 1; + var bytesLenB = b.length << 1; + var bytesLenC = c.length << 1; + var length = bytesLenA + bytesLenB + bytesLenC; + var result = __alloc(length, idof()); + var offset: usize = 0; + if (bytesLenA) { + memory.copy( + result, + changetype(a), + bytesLenA + ); + offset += bytesLenA; + } + if (bytesLenB) { + memory.copy( + result + offset, + changetype(b), + bytesLenB + ); + offset += bytesLenB; + } + if (bytesLenC) { + memory.copy( + result + offset, + changetype(c), + bytesLenC + ); + } + return changetype(result); // retains +} + export function joinReferenceArray(dataStart: usize, length: i32, separator: string): string { var lastIndex = length - 1; if (lastIndex < 0) return ""; From 1a4f7705800c23d9a38c43617062dd848860f7f5 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 9 Aug 2020 12:07:44 +0300 Subject: [PATCH 2/4] cleanups --- std/assembly/util/string.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/std/assembly/util/string.ts b/std/assembly/util/string.ts index 6eb66a85b2..9b849c0db7 100644 --- a/std/assembly/util/string.ts +++ b/std/assembly/util/string.ts @@ -1,6 +1,5 @@ import { itoa32, utoa32, itoa64, utoa64, dtoa, itoa_buffered, dtoa_buffered, MAX_DOUBLE_LENGTH } from "./number"; import { ipow32 } from "../math"; -import { string } from "string"; // All tables are stored as two staged lookup tables (static tries) // because the full range of Unicode symbols can't be efficiently @@ -1022,8 +1021,7 @@ export function joinThreeStrings(a: string, b: string, c: string): string { var bytesLenA = a.length << 1; var bytesLenB = b.length << 1; var bytesLenC = c.length << 1; - var length = bytesLenA + bytesLenB + bytesLenC; - var result = __alloc(length, idof()); + var result = __alloc(bytesLenA + bytesLenB + bytesLenC, idof()); var offset: usize = 0; if (bytesLenA) { memory.copy( From 50927886c2d68b822566d9c9a5542c0a0a7e46a3 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 9 Aug 2020 12:38:49 +0300 Subject: [PATCH 3/4] optimize joinStringArray --- std/assembly/util/string.ts | 18 +++++++++--------- tests/compiler/std/array.untouched.wat | 18 ++++++------------ 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/std/assembly/util/string.ts b/std/assembly/util/string.ts index 9b849c0db7..a3f12751e3 100644 --- a/std/assembly/util/string.ts +++ b/std/assembly/util/string.ts @@ -983,25 +983,25 @@ export function joinStringArray(dataStart: usize, length: i32, separator: string // @ts-ignore: type if (value !== null) estLen += value.length; } - var offset = 0; - var sepLen = separator.length; - var result = __alloc((estLen + sepLen * lastIndex) << 1, idof()); + var offset: usize = 0; + var sepLen = separator.length << 1; + var result = __alloc((estLen << 1) + sepLen * lastIndex, idof()); for (let i = 0; i < lastIndex; ++i) { value = load(dataStart + (i << alignof())); if (value !== null) { - let valueLen = value.length; + let valueLen = value.length << 1; memory.copy( - result + (offset << 1), + result + offset, changetype(value), - valueLen << 1 + valueLen ); offset += valueLen; } if (sepLen) { memory.copy( - result + (offset << 1), + result + offset, changetype(separator), - sepLen << 1 + sepLen ); offset += sepLen; } @@ -1009,7 +1009,7 @@ export function joinStringArray(dataStart: usize, length: i32, separator: string value = load(dataStart + (lastIndex << alignof())); if (value !== null) { memory.copy( - result + (offset << 1), + result + offset, changetype(value), value.length << 1 ); diff --git a/tests/compiler/std/array.untouched.wat b/tests/compiler/std/array.untouched.wat index 347bea9cff..7c88527e6a 100644 --- a/tests/compiler/std/array.untouched.wat +++ b/tests/compiler/std/array.untouched.wat @@ -16072,15 +16072,17 @@ local.set $10 local.get $2 call $~lib/string/String#get:length + i32.const 1 + i32.shl local.set $11 local.get $5 + i32.const 1 + i32.shl local.get $11 local.get $3 i32.mul i32.add i32.const 1 - i32.shl - i32.const 1 call $~lib/rt/tlsf/__alloc local.set $12 i32.const 0 @@ -16117,16 +16119,14 @@ if local.get $6 call $~lib/string/String#get:length + i32.const 1 + i32.shl local.set $9 local.get $12 local.get $10 - i32.const 1 - i32.shl i32.add local.get $6 local.get $9 - i32.const 1 - i32.shl call $~lib/memory/memory.copy local.get $10 local.get $9 @@ -16137,13 +16137,9 @@ if local.get $12 local.get $10 - i32.const 1 - i32.shl i32.add local.get $2 local.get $11 - i32.const 1 - i32.shl call $~lib/memory/memory.copy local.get $10 local.get $11 @@ -16182,8 +16178,6 @@ if local.get $12 local.get $10 - i32.const 1 - i32.shl i32.add local.get $6 local.get $6 From c0831f424e63eff44f372d6335de51cca9f6c856 Mon Sep 17 00:00:00 2001 From: MaxGraey Date: Sun, 9 Aug 2020 21:52:27 +0300 Subject: [PATCH 4/4] rename to concat3 --- std/assembly/util/string.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/std/assembly/util/string.ts b/std/assembly/util/string.ts index a3f12751e3..e812a38c48 100644 --- a/std/assembly/util/string.ts +++ b/std/assembly/util/string.ts @@ -1017,7 +1017,7 @@ export function joinStringArray(dataStart: usize, length: i32, separator: string return changetype(result); // retains } -export function joinThreeStrings(a: string, b: string, c: string): string { +export function concat3(a: string, b: string, c: string): string { var bytesLenA = a.length << 1; var bytesLenB = b.length << 1; var bytesLenC = c.length << 1;