Skip to content

Commit 83726bb

Browse files
authored
Add utility functions for string to integer conversions (#366)
* Add utility functions, improve integer conversion functions - move `is_be()` to cutils.h - add `is_upper_ascii()` and `to_upper_ascii()` - add extensive benchmark for integer conversion variants in **tests/test_conv.c** - add `u32toa()`, `i32toa()`, `u64toa()`, `i64toa()` based on register shift variant - add `u32toa_radix()`, `u64toa_radix()`, `i64toa_radix()` based on length_loop variant - use direct converters instead of `snprintf()` - copy NaN and Infinity directly in `js_dtoa1()` - optimize `js_number_toString()` for small integers - use `JS_NewStringLen()` instead of `JS_NewString()` when possible - add more precise conversion tests in microbench.js - disable some benchmark tests for gcc (they cause ASAN failures)
1 parent f326a7a commit 83726bb

File tree

7 files changed

+2078
-91
lines changed

7 files changed

+2078
-91
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,9 @@ if(BUILD_EXAMPLES AND NOT WIN32)
324324
target_link_libraries(test_fib ${qjs_libs})
325325
endif()
326326

327+
add_executable(test_conv
328+
tests/test_conv.c
329+
)
327330

328331
# Install target
329332
#

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ $(QJS): $(BUILD_DIR)
5353
$(QJSC): $(BUILD_DIR)
5454
cmake --build $(BUILD_DIR) --target qjsc -j $(JOBS)
5555

56+
$(BUILD_DIR)/test_conv: $(BUILD_DIR) tests/test_conv.c
57+
cmake --build $(BUILD_DIR) --target test_conv
58+
5659
install: $(QJS) $(QJSC)
5760
cmake --build $(BUILD_DIR) --target install
5861

@@ -86,6 +89,9 @@ test: $(QJS)
8689
$(QJS) tests/test_worker.js
8790
$(QJS) tests/test_queue_microtask.js
8891

92+
testconv: $(BUILD_DIR)/test_conv
93+
$(BUILD_DIR)/test_conv
94+
8995
test262: $(QJS)
9096
$(RUN262) -m -c test262.conf -a
9197

cutils.c

Lines changed: 230 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,8 @@ void dbuf_free(DynBuf *s)
213213
memset(s, 0, sizeof(*s));
214214
}
215215

216+
/*--- Unicode / UTF-8 utility functions --*/
217+
216218
/* Note: at most 31 bits are encoded. At most UTF8_CHAR_LEN_MAX bytes
217219
are output. */
218220
int unicode_to_utf8(uint8_t *buf, unsigned int c)
@@ -315,6 +317,231 @@ int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp)
315317
return c;
316318
}
317319

320+
/*--- integer to string conversions --*/
321+
322+
/* All conversion functions:
323+
- require a destination array `buf` of sufficient length
324+
- write the string representation at the beginning of `buf`
325+
- null terminate the string
326+
- return the string length
327+
*/
328+
329+
/* 2 <= base <= 36 */
330+
char const digits36[36] = "0123456789abcdefghijklmnopqrstuvwxyz";
331+
332+
/* using u32toa_shift variant */
333+
334+
#define gen_digit(buf, c) if (is_be()) \
335+
buf = (buf >> 8) | ((uint64_t)(c) << ((sizeof(buf) - 1) * 8)); \
336+
else \
337+
buf = (buf << 8) | (c)
338+
339+
size_t u7toa_shift(char dest[minimum_length(8)], uint32_t n)
340+
{
341+
size_t len = 1;
342+
uint64_t buf = 0;
343+
while (n >= 10) {
344+
uint32_t quo = n % 10;
345+
n /= 10;
346+
gen_digit(buf, '0' + quo);
347+
len++;
348+
}
349+
gen_digit(buf, '0' + n);
350+
memcpy(dest, &buf, sizeof buf);
351+
return len;
352+
}
353+
354+
size_t u07toa_shift(char dest[minimum_length(8)], uint32_t n, size_t len)
355+
{
356+
size_t i;
357+
dest += len;
358+
dest[7] = '\0';
359+
for (i = 7; i-- > 1;) {
360+
uint32_t quo = n % 10;
361+
n /= 10;
362+
dest[i] = (char)('0' + quo);
363+
}
364+
dest[i] = (char)('0' + n);
365+
return len + 7;
366+
}
367+
368+
size_t u32toa(char buf[minimum_length(11)], uint32_t n)
369+
{
370+
if (n < 10) {
371+
buf[0] = (char)('0' + n);
372+
buf[1] = '\0';
373+
return 1;
374+
}
375+
#define TEN_POW_7 10000000
376+
if (n >= TEN_POW_7) {
377+
uint32_t quo = n / TEN_POW_7;
378+
n %= TEN_POW_7;
379+
size_t len = u7toa_shift(buf, quo);
380+
return u07toa_shift(buf, n, len);
381+
}
382+
return u7toa_shift(buf, n);
383+
}
384+
385+
size_t u64toa(char buf[minimum_length(21)], uint64_t n)
386+
{
387+
if (likely(n < 0x100000000))
388+
return u32toa(buf, n);
389+
390+
size_t len;
391+
if (n >= TEN_POW_7) {
392+
uint64_t n1 = n / TEN_POW_7;
393+
n %= TEN_POW_7;
394+
if (n1 >= TEN_POW_7) {
395+
uint32_t quo = n1 / TEN_POW_7;
396+
n1 %= TEN_POW_7;
397+
len = u7toa_shift(buf, quo);
398+
len = u07toa_shift(buf, n1, len);
399+
} else {
400+
len = u7toa_shift(buf, n1);
401+
}
402+
return u07toa_shift(buf, n, len);
403+
}
404+
return u7toa_shift(buf, n);
405+
}
406+
407+
size_t i32toa(char buf[minimum_length(12)], int32_t n)
408+
{
409+
if (likely(n >= 0))
410+
return u32toa(buf, n);
411+
412+
buf[0] = '-';
413+
return 1 + u32toa(buf + 1, -(uint32_t)n);
414+
}
415+
416+
size_t i64toa(char buf[minimum_length(22)], int64_t n)
417+
{
418+
if (likely(n >= 0))
419+
return u64toa(buf, n);
420+
421+
buf[0] = '-';
422+
return 1 + u64toa(buf + 1, -(uint64_t)n);
423+
}
424+
425+
/* using u32toa_radix_length variant */
426+
427+
static uint8_t const radix_shift[64] = {
428+
0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0,
429+
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
430+
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
431+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
432+
};
433+
434+
size_t u32toa_radix(char buf[minimum_length(33)], uint32_t n, unsigned base)
435+
{
436+
#ifdef USE_SPECIAL_RADIX_10
437+
if (likely(base == 10))
438+
return u32toa(buf, n);
439+
#endif
440+
if (n < base) {
441+
buf[0] = digits36[n];
442+
buf[1] = '\0';
443+
return 1;
444+
}
445+
int shift = radix_shift[base & 63];
446+
if (shift) {
447+
uint32_t mask = (1 << shift) - 1;
448+
size_t len = (32 - clz32(n) + shift - 1) / shift;
449+
size_t last = n & mask;
450+
n /= base;
451+
char *end = buf + len;
452+
*end-- = '\0';
453+
*end-- = digits36[last];
454+
while (n >= base) {
455+
size_t quo = n & mask;
456+
n >>= shift;
457+
*end-- = digits36[quo];
458+
}
459+
*end = digits36[n];
460+
return len;
461+
} else {
462+
size_t len = 2;
463+
size_t last = n % base;
464+
n /= base;
465+
uint32_t nbase = base;
466+
while (n >= nbase) {
467+
nbase *= base;
468+
len++;
469+
}
470+
char *end = buf + len;
471+
*end-- = '\0';
472+
*end-- = digits36[last];
473+
while (n >= base) {
474+
size_t quo = n % base;
475+
n /= base;
476+
*end-- = digits36[quo];
477+
}
478+
*end = digits36[n];
479+
return len;
480+
}
481+
}
482+
483+
size_t u64toa_radix(char buf[minimum_length(65)], uint64_t n, unsigned base)
484+
{
485+
#ifdef USE_SPECIAL_RADIX_10
486+
if (likely(base == 10))
487+
return u64toa(buf, n);
488+
#endif
489+
int shift = radix_shift[base & 63];
490+
if (shift) {
491+
if (n < base) {
492+
buf[0] = digits36[n];
493+
buf[1] = '\0';
494+
return 1;
495+
}
496+
uint64_t mask = (1 << shift) - 1;
497+
size_t len = (64 - clz64(n) + shift - 1) / shift;
498+
size_t last = n & mask;
499+
n /= base;
500+
char *end = buf + len;
501+
*end-- = '\0';
502+
*end-- = digits36[last];
503+
while (n >= base) {
504+
size_t quo = n & mask;
505+
n >>= shift;
506+
*end-- = digits36[quo];
507+
}
508+
*end = digits36[n];
509+
return len;
510+
} else {
511+
if (likely(n < 0x100000000))
512+
return u32toa_radix(buf, n, base);
513+
size_t last = n % base;
514+
n /= base;
515+
uint64_t nbase = base;
516+
size_t len = 2;
517+
while (n >= nbase) {
518+
nbase *= base;
519+
len++;
520+
}
521+
char *end = buf + len;
522+
*end-- = '\0';
523+
*end-- = digits36[last];
524+
while (n >= base) {
525+
size_t quo = n % base;
526+
n /= base;
527+
*end-- = digits36[quo];
528+
}
529+
*end = digits36[n];
530+
return len;
531+
}
532+
}
533+
534+
size_t i64toa_radix(char buf[minimum_length(66)], int64_t n, unsigned int base)
535+
{
536+
if (likely(n >= 0))
537+
return u64toa_radix(buf, n, base);
538+
539+
buf[0] = '-';
540+
return 1 + u64toa_radix(buf + 1, -(uint64_t)n, base);
541+
}
542+
543+
/*---- sorting with opaque argument ----*/
544+
318545
typedef void (*exchange_f)(void *a, void *b, size_t size);
319546
typedef int (*cmp_f)(const void *, const void *, void *opaque);
320547

@@ -614,6 +841,8 @@ void rqsort(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque)
614841
}
615842
}
616843

844+
/*---- Portable time functions ----*/
845+
617846
#if defined(_MSC_VER)
618847
// From: https://stackoverflow.com/a/26085827
619848
static int gettimeofday_msvc(struct timeval *tp, struct timezone *tzp)
@@ -677,7 +906,7 @@ int64_t js__gettimeofday_us(void) {
677906
return ((int64_t)tv.tv_sec * 1000000) + tv.tv_usec;
678907
}
679908

680-
/* Cross-platform threading APIs. */
909+
/*--- Cross-platform threading APIs. ----*/
681910

682911
#if !defined(EMSCRIPTEN) && !defined(__wasi__)
683912

cutils.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,14 @@ char *pstrcat(char *buf, int buf_size, const char *s);
131131
int strstart(const char *str, const char *val, const char **ptr);
132132
int has_suffix(const char *str, const char *suffix);
133133

134+
static inline uint8_t is_be(void) {
135+
union {
136+
uint16_t a;
137+
uint8_t b;
138+
} u = { 0x100 };
139+
return u.b;
140+
}
141+
134142
static inline int max_int(int a, int b)
135143
{
136144
if (a > b)
@@ -426,6 +434,23 @@ static inline int from_hex(int c)
426434
return -1;
427435
}
428436

437+
static inline uint8_t is_upper_ascii(uint8_t c) {
438+
return c >= 'A' && c <= 'Z';
439+
}
440+
441+
static inline uint8_t to_upper_ascii(uint8_t c) {
442+
return c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c;
443+
}
444+
445+
extern char const digits36[36];
446+
size_t u32toa(char buf[minimum_length(11)], uint32_t n);
447+
size_t i32toa(char buf[minimum_length(12)], int32_t n);
448+
size_t u64toa(char buf[minimum_length(21)], uint64_t n);
449+
size_t i64toa(char buf[minimum_length(22)], int64_t n);
450+
size_t u32toa_radix(char buf[minimum_length(33)], uint32_t n, unsigned int base);
451+
size_t u64toa_radix(char buf[minimum_length(65)], uint64_t n, unsigned int base);
452+
size_t i64toa_radix(char buf[minimum_length(66)], int64_t n, unsigned int base);
453+
429454
void rqsort(void *base, size_t nmemb, size_t size,
430455
int (*cmp)(const void *, const void *, void *),
431456
void *arg);

0 commit comments

Comments
 (0)