Skip to content

Add utility functions for string to integer conversions #366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,9 @@ if(BUILD_EXAMPLES AND NOT WIN32)
target_link_libraries(test_fib ${qjs_libs})
endif()

add_executable(test_conv
tests/test_conv.c
)

# Install target
#
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ $(QJS): $(BUILD_DIR)
$(QJSC): $(BUILD_DIR)
cmake --build $(BUILD_DIR) --target qjsc -j $(JOBS)

$(BUILD_DIR)/test_conv: $(BUILD_DIR) tests/test_conv.c
cmake --build $(BUILD_DIR) --target test_conv

install: $(QJS) $(QJSC)
cmake --build $(BUILD_DIR) --target install

Expand Down Expand Up @@ -86,6 +89,9 @@ test: $(QJS)
$(QJS) tests/test_worker.js
$(QJS) tests/test_queue_microtask.js

testconv: $(BUILD_DIR)/test_conv
$(BUILD_DIR)/test_conv

test262: $(QJS)
$(RUN262) -m -c test262.conf -a

Expand Down
231 changes: 230 additions & 1 deletion cutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ void dbuf_free(DynBuf *s)
memset(s, 0, sizeof(*s));
}

/*--- Unicode / UTF-8 utility functions --*/

/* Note: at most 31 bits are encoded. At most UTF8_CHAR_LEN_MAX bytes
are output. */
int unicode_to_utf8(uint8_t *buf, unsigned int c)
Expand Down Expand Up @@ -315,6 +317,231 @@ int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp)
return c;
}

/*--- integer to string conversions --*/

/* All conversion functions:
- require a destination array `buf` of sufficient length
- write the string representation at the beginning of `buf`
- null terminate the string
- return the string length
*/

/* 2 <= base <= 36 */
char const digits36[36] = "0123456789abcdefghijklmnopqrstuvwxyz";

/* using u32toa_shift variant */

#define gen_digit(buf, c) if (is_be()) \
buf = (buf >> 8) | ((uint64_t)(c) << ((sizeof(buf) - 1) * 8)); \
else \
buf = (buf << 8) | (c)

size_t u7toa_shift(char dest[minimum_length(8)], uint32_t n)
{
size_t len = 1;
uint64_t buf = 0;
while (n >= 10) {
uint32_t quo = n % 10;
n /= 10;
gen_digit(buf, '0' + quo);
len++;
}
gen_digit(buf, '0' + n);
memcpy(dest, &buf, sizeof buf);
return len;
}

size_t u07toa_shift(char dest[minimum_length(8)], uint32_t n, size_t len)
{
size_t i;
dest += len;
dest[7] = '\0';
for (i = 7; i-- > 1;) {
uint32_t quo = n % 10;
n /= 10;
dest[i] = (char)('0' + quo);
}
dest[i] = (char)('0' + n);
return len + 7;
}

size_t u32toa(char buf[minimum_length(11)], uint32_t n)
{
if (n < 10) {
buf[0] = (char)('0' + n);
buf[1] = '\0';
return 1;
}
#define TEN_POW_7 10000000
if (n >= TEN_POW_7) {
uint32_t quo = n / TEN_POW_7;
n %= TEN_POW_7;
size_t len = u7toa_shift(buf, quo);
return u07toa_shift(buf, n, len);
}
return u7toa_shift(buf, n);
}

size_t u64toa(char buf[minimum_length(21)], uint64_t n)
{
if (likely(n < 0x100000000))
return u32toa(buf, n);

size_t len;
if (n >= TEN_POW_7) {
uint64_t n1 = n / TEN_POW_7;
n %= TEN_POW_7;
if (n1 >= TEN_POW_7) {
uint32_t quo = n1 / TEN_POW_7;
n1 %= TEN_POW_7;
len = u7toa_shift(buf, quo);
len = u07toa_shift(buf, n1, len);
} else {
len = u7toa_shift(buf, n1);
}
return u07toa_shift(buf, n, len);
}
return u7toa_shift(buf, n);
}

size_t i32toa(char buf[minimum_length(12)], int32_t n)
{
if (likely(n >= 0))
return u32toa(buf, n);

buf[0] = '-';
return 1 + u32toa(buf + 1, -(uint32_t)n);
}

size_t i64toa(char buf[minimum_length(22)], int64_t n)
{
if (likely(n >= 0))
return u64toa(buf, n);

buf[0] = '-';
return 1 + u64toa(buf + 1, -(uint64_t)n);
}

/* using u32toa_radix_length variant */

static uint8_t const radix_shift[64] = {
0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};

size_t u32toa_radix(char buf[minimum_length(33)], uint32_t n, unsigned base)
{
#ifdef USE_SPECIAL_RADIX_10
if (likely(base == 10))
return u32toa(buf, n);
#endif
if (n < base) {
buf[0] = digits36[n];
buf[1] = '\0';
return 1;
}
int shift = radix_shift[base & 63];
if (shift) {
uint32_t mask = (1 << shift) - 1;
size_t len = (32 - clz32(n) + shift - 1) / shift;
size_t last = n & mask;
n /= base;
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n & mask;
n >>= shift;
*end-- = digits36[quo];
}
*end = digits36[n];
return len;
} else {
size_t len = 2;
size_t last = n % base;
n /= base;
uint32_t nbase = base;
while (n >= nbase) {
nbase *= base;
len++;
}
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n % base;
n /= base;
*end-- = digits36[quo];
}
*end = digits36[n];
return len;
}
}

size_t u64toa_radix(char buf[minimum_length(65)], uint64_t n, unsigned base)
{
#ifdef USE_SPECIAL_RADIX_10
if (likely(base == 10))
return u64toa(buf, n);
#endif
int shift = radix_shift[base & 63];
if (shift) {
if (n < base) {
buf[0] = digits36[n];
buf[1] = '\0';
return 1;
}
uint64_t mask = (1 << shift) - 1;
size_t len = (64 - clz64(n) + shift - 1) / shift;
size_t last = n & mask;
n /= base;
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n & mask;
n >>= shift;
*end-- = digits36[quo];
}
*end = digits36[n];
return len;
} else {
if (likely(n < 0x100000000))
return u32toa_radix(buf, n, base);
size_t last = n % base;
n /= base;
uint64_t nbase = base;
size_t len = 2;
while (n >= nbase) {
nbase *= base;
len++;
}
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n % base;
n /= base;
*end-- = digits36[quo];
}
*end = digits36[n];
return len;
}
}

size_t i64toa_radix(char buf[minimum_length(66)], int64_t n, unsigned int base)
{
if (likely(n >= 0))
return u64toa_radix(buf, n, base);

buf[0] = '-';
return 1 + u64toa_radix(buf + 1, -(uint64_t)n, base);
}

/*---- sorting with opaque argument ----*/

typedef void (*exchange_f)(void *a, void *b, size_t size);
typedef int (*cmp_f)(const void *, const void *, void *opaque);

Expand Down Expand Up @@ -614,6 +841,8 @@ void rqsort(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque)
}
}

/*---- Portable time functions ----*/

#if defined(_MSC_VER)
// From: https://stackoverflow.com/a/26085827
static int gettimeofday_msvc(struct timeval *tp, struct timezone *tzp)
Expand Down Expand Up @@ -677,7 +906,7 @@ int64_t js__gettimeofday_us(void) {
return ((int64_t)tv.tv_sec * 1000000) + tv.tv_usec;
}

/* Cross-platform threading APIs. */
/*--- Cross-platform threading APIs. ----*/

#if !defined(EMSCRIPTEN) && !defined(__wasi__)

Expand Down
25 changes: 25 additions & 0 deletions cutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ char *pstrcat(char *buf, int buf_size, const char *s);
int strstart(const char *str, const char *val, const char **ptr);
int has_suffix(const char *str, const char *suffix);

static inline uint8_t is_be(void) {
union {
uint16_t a;
uint8_t b;
} u = { 0x100 };
return u.b;
}

static inline int max_int(int a, int b)
{
if (a > b)
Expand Down Expand Up @@ -426,6 +434,23 @@ static inline int from_hex(int c)
return -1;
}

static inline uint8_t is_upper_ascii(uint8_t c) {
return c >= 'A' && c <= 'Z';
}

static inline uint8_t to_upper_ascii(uint8_t c) {
return c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c;
}

extern char const digits36[36];
size_t u32toa(char buf[minimum_length(11)], uint32_t n);
size_t i32toa(char buf[minimum_length(12)], int32_t n);
size_t u64toa(char buf[minimum_length(21)], uint64_t n);
size_t i64toa(char buf[minimum_length(22)], int64_t n);
size_t u32toa_radix(char buf[minimum_length(33)], uint32_t n, unsigned int base);
size_t u64toa_radix(char buf[minimum_length(65)], uint64_t n, unsigned int base);
size_t i64toa_radix(char buf[minimum_length(66)], int64_t n, unsigned int base);

void rqsort(void *base, size_t nmemb, size_t size,
int (*cmp)(const void *, const void *, void *),
void *arg);
Expand Down
Loading