Skip to content

Commit a0c4f85

Browse files
[libc] Change ctype to be encoding independent (#110574)
The previous implementation of the ctype functions assumed ASCII. This patch changes to a switch/case implementation that looks odd, but actually is easier for the compiler to understand and optimize.
1 parent e0ae779 commit a0c4f85

33 files changed

+915
-185
lines changed

libc/src/__support/ctype_utils.h

Lines changed: 546 additions & 23 deletions
Large diffs are not rendered by default.

libc/src/__support/high_precision_decimal.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,11 @@ class HighPrecisionDecimal {
178178
if (digit_index >= this->num_digits) {
179179
return new_digits - 1;
180180
}
181-
if (this->digits[digit_index] != power_of_five[digit_index] - '0') {
181+
if (this->digits[digit_index] !=
182+
internal::b36_char_to_int(power_of_five[digit_index])) {
182183
return new_digits -
183-
((this->digits[digit_index] < power_of_five[digit_index] - '0')
184+
((this->digits[digit_index] <
185+
internal::b36_char_to_int(power_of_five[digit_index]))
184186
? 1
185187
: 0);
186188
}
@@ -337,8 +339,8 @@ class HighPrecisionDecimal {
337339
}
338340
++total_digits;
339341
if (this->num_digits < MAX_NUM_DIGITS) {
340-
this->digits[this->num_digits] =
341-
static_cast<uint8_t>(num_string[num_cur] - '0');
342+
this->digits[this->num_digits] = static_cast<uint8_t>(
343+
internal::b36_char_to_int(num_string[num_cur]));
342344
++this->num_digits;
343345
} else if (num_string[num_cur] != '0') {
344346
this->truncated = true;

libc/src/__support/integer_literals.h

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
#ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
1414
#define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
1515

16-
#include "src/__support/CPP/limits.h" // CHAR_BIT
16+
#include "src/__support/CPP/limits.h" // CHAR_BIT
17+
#include "src/__support/ctype_utils.h"
1718
#include "src/__support/macros/attributes.h" // LIBC_INLINE
1819
#include "src/__support/macros/config.h"
19-
#include "src/__support/uint128.h" // UInt128
20-
#include <stddef.h> // size_t
21-
#include <stdint.h> // uintxx_t
20+
#include "src/__support/uint128.h" // UInt128
21+
#include <stddef.h> // size_t
22+
#include <stdint.h> // uintxx_t
2223

2324
namespace LIBC_NAMESPACE_DECL {
2425

@@ -75,26 +76,13 @@ template <typename T, int base> struct DigitBuffer {
7576
push(*str);
7677
}
7778

78-
// Returns the digit for a particular character.
79-
// Returns INVALID_DIGIT if the character is invalid.
80-
LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) {
81-
const auto to_lower = [](char c) { return c | 32; };
82-
const auto is_digit = [](char c) { return c >= '0' && c <= '9'; };
83-
const auto is_alpha = [](char c) {
84-
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
85-
};
86-
if (is_digit(c))
87-
return static_cast<uint8_t>(c - '0');
88-
if (base > 10 && is_alpha(c))
89-
return static_cast<uint8_t>(to_lower(c) - 'a' + 10);
90-
return INVALID_DIGIT;
91-
}
92-
9379
// Adds a single character to this buffer.
9480
LIBC_INLINE constexpr void push(char c) {
9581
if (c == '\'')
9682
return; // ' is valid but not taken into account.
97-
const uint8_t value = get_digit_value(c);
83+
const int b36_val = internal::b36_char_to_int(c);
84+
const uint8_t value = static_cast<uint8_t>(
85+
b36_val < base && (b36_val != 0 || c == '0') ? b36_val : INVALID_DIGIT);
9886
if (value == INVALID_DIGIT || size >= MAX_DIGITS) {
9987
// During constant evaluation `__builtin_unreachable` will halt the
10088
// compiler as it is not executable. This is preferable over `assert` that

libc/src/__support/integer_to_string.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#include "src/__support/CPP/type_traits.h"
7070
#include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
7171
#include "src/__support/common.h"
72+
#include "src/__support/ctype_utils.h"
7273
#include "src/__support/macros/config.h"
7374

7475
namespace LIBC_NAMESPACE_DECL {
@@ -214,9 +215,9 @@ template <typename T, typename Fmt = radix::Dec> class IntegerToString {
214215
using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
215216

216217
LIBC_INLINE static char digit_char(uint8_t digit) {
217-
if (digit < 10)
218-
return '0' + static_cast<char>(digit);
219-
return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
218+
const int result = internal::int_to_b36_char(digit);
219+
return static_cast<char>(Fmt::IS_UPPERCASE ? internal::toupper(result)
220+
: result);
220221
}
221222

222223
LIBC_INLINE static void

libc/src/__support/str_to_float.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -909,7 +909,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
909909
cpp::numeric_limits<StorageType>::max() / BASE;
910910
while (true) {
911911
if (isdigit(src[index])) {
912-
uint32_t digit = src[index] - '0';
912+
uint32_t digit = b36_char_to_int(src[index]);
913913
seen_digit = true;
914914

915915
if (mantissa < bitstype_max_div_by_base) {

libc/src/__support/str_to_integer.h

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,14 @@ first_non_whitespace(const char *__restrict src,
4242
return src + src_cur;
4343
}
4444

45-
LIBC_INLINE int b36_char_to_int(char input) {
46-
if (isdigit(input))
47-
return input - '0';
48-
if (isalpha(input))
49-
return (input | 32) + 10 - 'a';
50-
return 0;
51-
}
52-
5345
// checks if the next 3 characters of the string pointer are the start of a
5446
// hexadecimal number. Does not advance the string pointer.
5547
LIBC_INLINE bool
5648
is_hex_start(const char *__restrict src,
5749
size_t src_len = cpp::numeric_limits<size_t>::max()) {
5850
if (src_len < 3)
5951
return false;
60-
return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
52+
return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) &&
6153
b36_char_to_int(*(src + 2)) < 16;
6254
}
6355

libc/src/ctype/isxdigit.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) {
1818
const unsigned ch = static_cast<unsigned>(c);
19-
return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
19+
return static_cast<int>(internal::isalnum(ch) &&
20+
internal::b36_char_to_int(ch) < 16);
2021
}
2122

2223
} // namespace LIBC_NAMESPACE_DECL

libc/src/ctype/isxdigit_l.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) {
1818
const unsigned ch = static_cast<unsigned>(c);
19-
return static_cast<int>(internal::isdigit(ch) || (ch | 32) - 'a' < 6);
19+
return static_cast<int>(internal::isalnum(ch) &&
20+
internal::b36_char_to_int(ch) < 16);
2021
}
2122

2223
} // namespace LIBC_NAMESPACE_DECL

libc/src/ctype/toupper.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414

1515
namespace LIBC_NAMESPACE_DECL {
1616

17-
LLVM_LIBC_FUNCTION(int, toupper, (int c)) {
18-
if (internal::islower(c))
19-
return c - ('a' - 'A');
20-
return c;
21-
}
17+
LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); }
2218

2319
} // namespace LIBC_NAMESPACE_DECL

libc/src/ctype/toupper_l.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) {
18-
if (internal::islower(c))
19-
return c - ('a' - 'A');
20-
return c;
18+
return internal::toupper(c);
2119
}
2220

2321
} // namespace LIBC_NAMESPACE_DECL

0 commit comments

Comments
 (0)