From 79a0b0c0ddd562ae4cc237940ad70e85307cbcfe Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Wed, 25 Sep 2024 14:52:14 -0700 Subject: [PATCH 1/5] [libc] Change ctype to be encoding independent The previous implementation of the ctype functions assumed ASCII. This patch changes to a switch/case implementation that looks odd, but actually is easier for the compiler to understand and optimize. --- libc/src/__support/ctype_utils.h | 298 ++++++++++++++++++++++++++++--- 1 file changed, 275 insertions(+), 23 deletions(-) diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h index 91f6ce8cabd8d..2e9b23d3bc37f 100644 --- a/libc/src/__support/ctype_utils.h +++ b/libc/src/__support/ctype_utils.h @@ -15,44 +15,296 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -// ------------------------------------------------------ -// Rationale: Since these classification functions are -// called in other functions, we will avoid the overhead -// of a function call by inlining them. -// ------------------------------------------------------ +// ----------------------------------------------------------------------------- +// ****************** WARNING ****************** +// ****************** DO NOT TRY TO OPTIMIZE THESE FUNCTIONS! ****************** +// ----------------------------------------------------------------------------- +// This switch/case form is easier for the compiler to understand, and is +// optimized into a form that is almost always the same as or better than +// versions written by hand (see https://godbolt.org/z/qvrebqvvr). Also this +// form makes these functions encoding independent. If you want to rewrite these +// functions, make sure you have benchmarks to show your new solution is faster, +// as well as a way to support non-ASCII character encodings. -LIBC_INLINE static constexpr bool isalpha(unsigned ch) { - return (ch | 32) - 'a' < 26; +LIBC_INLINE static constexpr bool islower(int ch) { + switch (ch) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr bool isdigit(unsigned ch) { - return (ch - '0') < 10; +LIBC_INLINE static constexpr bool isupper(int ch) { + switch (ch) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr bool isalnum(unsigned ch) { - return isalpha(ch) || isdigit(ch); +LIBC_INLINE static constexpr bool isdigit(int ch) { + switch (ch) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr bool isgraph(unsigned ch) { - return 0x20 < ch && ch < 0x7f; +LIBC_INLINE static constexpr int tolower(int ch) { + switch (ch) { + case 'A': + return 'a'; + case 'B': + return 'b'; + case 'C': + return 'c'; + case 'D': + return 'd'; + case 'E': + return 'e'; + case 'F': + return 'f'; + case 'G': + return 'g'; + case 'H': + return 'h'; + case 'I': + return 'i'; + case 'J': + return 'j'; + case 'K': + return 'k'; + case 'L': + return 'l'; + case 'M': + return 'm'; + case 'N': + return 'n'; + case 'O': + return 'o'; + case 'P': + return 'p'; + case 'Q': + return 'q'; + case 'R': + return 'r'; + case 'S': + return 's'; + case 'T': + return 't'; + case 'U': + return 'u'; + case 'V': + return 'v'; + case 'W': + return 'w'; + case 'X': + return 'x'; + case 'Y': + return 'y'; + case 'Z': + return 'z'; + default: + return ch; + } +} + +LIBC_INLINE static constexpr bool isalpha(int ch) { + switch (tolower(ch)) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr bool islower(unsigned ch) { - return (ch - 'a') < 26; +LIBC_INLINE static constexpr bool isalnum(int ch) { + return isalpha(ch) || isdigit(ch); } -LIBC_INLINE static constexpr bool isupper(unsigned ch) { - return (ch - 'A') < 26; +LIBC_INLINE static constexpr int b36_char_to_int(int ch) { + switch (tolower(ch)) { + case '0': + return 0; + case '1': + return 1; + case '2': + return 2; + case '3': + return 3; + case '4': + return 4; + case '5': + return 5; + case '6': + return 6; + case '7': + return 7; + case '8': + return 8; + case '9': + return 9; + case 'a': + return 10; + case 'b': + return 11; + case 'c': + return 12; + case 'd': + return 13; + case 'e': + return 14; + case 'f': + return 15; + case 'g': + return 16; + case 'h': + return 17; + case 'i': + return 18; + case 'j': + return 19; + case 'k': + return 20; + case 'l': + return 21; + case 'm': + return 22; + case 'n': + return 23; + case 'o': + return 24; + case 'p': + return 25; + case 'q': + return 26; + case 'r': + return 27; + case 's': + return 28; + case 't': + return 29; + case 'u': + return 30; + case 'v': + return 31; + case 'w': + return 32; + case 'x': + return 33; + case 'y': + return 34; + case 'z': + return 35; + default: + return 0; + } } -LIBC_INLINE static constexpr bool isspace(unsigned ch) { - return ch == ' ' || (ch - '\t') < 5; +LIBC_INLINE static constexpr bool isspace(int ch) { + switch (ch) { + case ' ': + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + return true; + default: + return false; + } } -LIBC_INLINE static constexpr int tolower(int ch) { - if (isupper(ch)) - return ch + ('a' - 'A'); - return ch; +// not yet encoding independent. +LIBC_INLINE static constexpr bool isgraph(int ch) { + return 0x20 < ch && ch < 0x7f; } } // namespace internal From 795ba04531233172c6619fe3e469279ecb0d155d Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 14 Nov 2024 11:28:48 -0800 Subject: [PATCH 2/5] Move functions to use ctype utils, cleanup --- libc/src/__support/ctype_utils.h | 266 +++++++++++++++++- libc/src/__support/integer_literals.h | 28 +- libc/src/ctype/isxdigit.cpp | 3 +- libc/src/ctype/isxdigit_l.cpp | 3 +- libc/src/stdio/printf_core/fixed_converter.h | 14 +- .../stdio/printf_core/float_dec_converter.h | 6 +- .../stdio/printf_core/float_hex_converter.h | 17 +- .../printf_core/float_inf_nan_converter.h | 9 +- libc/src/stdio/printf_core/int_converter.h | 21 +- libc/src/stdio/scanf_core/converter_utils.h | 10 - libc/src/stdio/scanf_core/float_converter.cpp | 18 +- libc/src/stdio/scanf_core/int_converter.cpp | 12 +- libc/src/stdio/scanf_core/ptr_converter.cpp | 4 +- .../llvm-project-overlay/libc/BUILD.bazel | 2 + 14 files changed, 329 insertions(+), 84 deletions(-) diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h index 2e9b23d3bc37f..8521857ce765d 100644 --- a/libc/src/__support/ctype_utils.h +++ b/libc/src/__support/ctype_utils.h @@ -171,8 +171,67 @@ LIBC_INLINE static constexpr int tolower(int ch) { } } +LIBC_INLINE static constexpr int toupper(int ch) { + switch (ch) { + case 'a': + return 'A'; + case 'b': + return 'B'; + case 'c': + return 'C'; + case 'd': + return 'D'; + case 'e': + return 'E'; + case 'f': + return 'F'; + case 'g': + return 'G'; + case 'h': + return 'H'; + case 'i': + return 'I'; + case 'j': + return 'J'; + case 'k': + return 'K'; + case 'l': + return 'L'; + case 'm': + return 'M'; + case 'n': + return 'N'; + case 'o': + return 'O'; + case 'p': + return 'P'; + case 'q': + return 'Q'; + case 'r': + return 'R'; + case 's': + return 'S'; + case 't': + return 'T'; + case 'u': + return 'U'; + case 'v': + return 'V'; + case 'w': + return 'W'; + case 'x': + return 'X'; + case 'y': + return 'Y'; + case 'z': + return 'Z'; + default: + return ch; + } +} + LIBC_INLINE static constexpr bool isalpha(int ch) { - switch (tolower(ch)) { + switch (ch) { case 'a': case 'b': case 'c': @@ -199,6 +258,32 @@ LIBC_INLINE static constexpr bool isalpha(int ch) { case 'x': case 'y': case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': return true; default: return false; @@ -206,11 +291,77 @@ LIBC_INLINE static constexpr bool isalpha(int ch) { } LIBC_INLINE static constexpr bool isalnum(int ch) { - return isalpha(ch) || isdigit(ch); + switch (ch) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } } LIBC_INLINE static constexpr int b36_char_to_int(int ch) { - switch (tolower(ch)) { + switch (ch) { case '0': return 0; case '1': @@ -232,62 +383,171 @@ LIBC_INLINE static constexpr int b36_char_to_int(int ch) { case '9': return 9; case 'a': + case 'A': return 10; case 'b': + case 'B': return 11; case 'c': + case 'C': return 12; case 'd': + case 'D': return 13; case 'e': + case 'E': return 14; case 'f': + case 'F': return 15; case 'g': + case 'G': return 16; case 'h': + case 'H': return 17; case 'i': + case 'I': return 18; case 'j': + case 'J': return 19; case 'k': + case 'K': return 20; case 'l': + case 'L': return 21; case 'm': + case 'M': return 22; case 'n': + case 'N': return 23; case 'o': + case 'O': return 24; case 'p': + case 'P': return 25; case 'q': + case 'Q': return 26; case 'r': + case 'R': return 27; case 's': + case 'S': return 28; case 't': + case 'T': return 29; case 'u': + case 'U': return 30; case 'v': + case 'V': return 31; case 'w': + case 'W': return 32; case 'x': + case 'X': return 33; case 'y': + case 'Y': return 34; case 'z': + case 'Z': return 35; default: return 0; } } +LIBC_INLINE static constexpr int int_to_b36_char(int num) { + // Can't actually use LIBC_ASSERT here because it depends on integer_to_string + // which depends on this. + + // LIBC_ASSERT(num < 36); + switch (num) { + case 0: + return '0'; + case 1: + return '1'; + case 2: + return '2'; + case 3: + return '3'; + case 4: + return '4'; + case 5: + return '5'; + case 6: + return '6'; + case 7: + return '7'; + case 8: + return '8'; + case 9: + return '9'; + case 10: + return 'a'; + case 11: + return 'b'; + case 12: + return 'c'; + case 13: + return 'd'; + case 14: + return 'e'; + case 15: + return 'f'; + case 16: + return 'g'; + case 17: + return 'h'; + case 18: + return 'i'; + case 19: + return 'j'; + case 20: + return 'k'; + case 21: + return 'l'; + case 22: + return 'm'; + case 23: + return 'n'; + case 24: + return 'o'; + case 25: + return 'p'; + case 26: + return 'q'; + case 27: + return 'r'; + case 28: + return 's'; + case 29: + return 't'; + case 30: + return 'u'; + case 31: + return 'v'; + case 32: + return 'w'; + case 33: + return 'x'; + case 34: + return 'y'; + case 35: + return 'z'; + default: + return '!'; + } +} + LIBC_INLINE static constexpr bool isspace(int ch) { switch (ch) { case ' ': diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h index 4c5c4c4166681..0298ec7d088d6 100644 --- a/libc/src/__support/integer_literals.h +++ b/libc/src/__support/integer_literals.h @@ -13,12 +13,13 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H #define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H -#include "src/__support/CPP/limits.h" // CHAR_BIT +#include "src/__support/CPP/limits.h" // CHAR_BIT +#include "src/__support/ctype_utils.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/config.h" -#include "src/__support/uint128.h" // UInt128 -#include // size_t -#include // uintxx_t +#include "src/__support/uint128.h" // UInt128 +#include // size_t +#include // uintxx_t namespace LIBC_NAMESPACE_DECL { @@ -75,26 +76,13 @@ template struct DigitBuffer { push(*str); } - // Returns the digit for a particular character. - // Returns INVALID_DIGIT if the character is invalid. - LIBC_INLINE static constexpr uint8_t get_digit_value(const char c) { - const auto to_lower = [](char c) { return c | 32; }; - const auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; - const auto is_alpha = [](char c) { - return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); - }; - if (is_digit(c)) - return static_cast(c - '0'); - if (base > 10 && is_alpha(c)) - return static_cast(to_lower(c) - 'a' + 10); - return INVALID_DIGIT; - } - // Adds a single character to this buffer. LIBC_INLINE constexpr void push(char c) { if (c == '\'') return; // ' is valid but not taken into account. - const uint8_t value = get_digit_value(c); + const int b36_val = internal::b36_char_to_int(c); + const uint8_t value = static_cast( + b36_val < base && (b36_val != 0 || c == '0') ? b36_val : INVALID_DIGIT); if (value == INVALID_DIGIT || size >= MAX_DIGITS) { // During constant evaluation `__builtin_unreachable` will halt the // compiler as it is not executable. This is preferable over `assert` that diff --git a/libc/src/ctype/isxdigit.cpp b/libc/src/ctype/isxdigit.cpp index 6b730c354db08..81f645c6f49fc 100644 --- a/libc/src/ctype/isxdigit.cpp +++ b/libc/src/ctype/isxdigit.cpp @@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isxdigit, (int c)) { const unsigned ch = static_cast(c); - return static_cast(internal::isdigit(ch) || (ch | 32) - 'a' < 6); + return static_cast(internal::isalnum(ch) && + internal::b36_char_to_int(ch) < 16); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/isxdigit_l.cpp b/libc/src/ctype/isxdigit_l.cpp index 8a5c7d4d28ab1..eddfd20a2da3b 100644 --- a/libc/src/ctype/isxdigit_l.cpp +++ b/libc/src/ctype/isxdigit_l.cpp @@ -16,7 +16,8 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, isxdigit_l, (int c, locale_t)) { const unsigned ch = static_cast(c); - return static_cast(internal::isdigit(ch) || (ch | 32) - 'a' < 6); + return static_cast(internal::isalnum(ch) && + internal::b36_char_to_int(ch) < 16); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/fixed_converter.h b/libc/src/stdio/printf_core/fixed_converter.h index c8812d77b62e3..ba0a62d9fcb87 100644 --- a/libc/src/stdio/printf_core/fixed_converter.h +++ b/libc/src/stdio/printf_core/fixed_converter.h @@ -11,6 +11,7 @@ #include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/string_view.h" +#include "src/__support/ctype_utils.h" #include "src/__support/fixed_point/fx_bits.h" #include "src/__support/fixed_point/fx_rep.h" #include "src/__support/integer_to_string.h" @@ -68,10 +69,6 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) { using LARep = fixed_point::FXRep; using StorageType = LARep::StorageType; - // All of the letters will be defined relative to variable a, which will be - // the appropriate case based on the name of the conversion. This converts any - // conversion name into the letter 'a' with the appropriate case. - const char a = (to_conv.conv_name & 32) | 'A'; FormatFlags flags = to_conv.flags; bool is_negative; @@ -179,9 +176,9 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) { // unspecified. RoundDirection round; char first_digit_after = fraction_digits[precision]; - if (first_digit_after > '5') { + if (internal::b36_char_to_int(first_digit_after) > 5) { round = RoundDirection::Up; - } else if (first_digit_after < '5') { + } else if (internal::b36_char_to_int(first_digit_after) < 5) { round = RoundDirection::Down; } else { // first_digit_after == '5' @@ -204,7 +201,8 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) { keep_rounding = false; char cur_digit = fraction_digits[digit_to_round]; // if the digit should not be rounded up - if (round == RoundDirection::Even && ((cur_digit - '0') % 2) == 0) { + if (round == RoundDirection::Even && + (internal::b36_char_to_int(cur_digit) % 2) == 0) { // break out of the loop break; } @@ -246,7 +244,7 @@ LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) { char sign_char = 0; // Check if the conv name is uppercase - if (a == 'A') { + if (internal::isupper(to_conv.conv_name)) { // These flags are only for signed conversions, so this removes them if the // conversion is unsigned. flags = FormatFlags(flags & diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h index e39ba6ecea8d4..d93457fcafd7f 100644 --- a/libc/src/stdio/printf_core/float_dec_converter.h +++ b/libc/src/stdio/printf_core/float_dec_converter.h @@ -13,6 +13,7 @@ #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/big_int.h" // is_big_int_v +#include "src/__support/ctype_utils.h" #include "src/__support/float_to_string.h" #include "src/__support/integer_to_string.h" #include "src/__support/libc_assert.h" @@ -587,8 +588,6 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer, int exponent = float_bits.get_explicit_exponent(); StorageType mantissa = float_bits.get_explicit_mantissa(); - const char a = (to_conv.conv_name & 32) | 'A'; - char sign_char = 0; if (float_bits.is_neg()) @@ -734,7 +733,8 @@ LIBC_INLINE int convert_float_dec_exp_typed(Writer *writer, round = get_round_direction(last_digit, truncated, float_bits.sign()); RET_IF_RESULT_NEGATIVE(float_writer.write_last_block( - digits, maximum, round, final_exponent, a + 'E' - 'A')); + digits, maximum, round, final_exponent, + internal::islower(to_conv.conv_name) ? 'e' : 'E')); RET_IF_RESULT_NEGATIVE(float_writer.right_pad()); return WRITE_OK; diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index 0b3ff3dd1cbfd..5d9c42882a589 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -12,6 +12,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/converter_utils.h" #include "src/stdio/printf_core/core_structs.h" @@ -31,7 +32,6 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, // All of the letters will be defined relative to variable a, which will be // the appropriate case based on the name of the conversion. This converts any // conversion name into the letter 'a' with the appropriate case. - const char a = (to_conv.conv_name & 32) | 'A'; bool is_negative; int exponent; @@ -138,9 +138,11 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, size_t mant_cur = mant_len; size_t first_non_zero = 1; for (; mant_cur > 0; --mant_cur, mantissa >>= 4) { - char mant_mod_16 = static_cast(mantissa) & 15; - char new_digit = static_cast( - (mant_mod_16 > 9) ? (mant_mod_16 - 10 + a) : (mant_mod_16 + '0')); + char mant_mod_16 = static_cast(mantissa % 16); + char new_digit = static_cast(internal::int_to_b36_char(mant_mod_16)); + if (internal::isupper(to_conv.conv_name)) { + new_digit = static_cast(internal::toupper(new_digit)); + } mant_buffer[mant_cur - 1] = new_digit; if (new_digit != '0' && first_non_zero < mant_cur) first_non_zero = mant_cur; @@ -168,7 +170,8 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, size_t exp_cur = EXP_LEN; for (; exponent > 0; --exp_cur, exponent /= 10) { - exp_buffer[exp_cur - 1] = static_cast((exponent % 10) + '0'); + exp_buffer[exp_cur - 1] = + static_cast(internal::int_to_b36_char(exponent % 10)); } if (exp_cur == EXP_LEN) { // if nothing else was written, write a 0. exp_buffer[EXP_LEN - 1] = '0'; @@ -187,7 +190,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, constexpr size_t PREFIX_LEN = 2; char prefix[PREFIX_LEN]; prefix[0] = '0'; - prefix[1] = a + ('x' - 'a'); + prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X'; const cpp::string_view prefix_str(prefix, PREFIX_LEN); // If the precision is greater than the actual result, pad with 0s @@ -200,7 +203,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, constexpr cpp::string_view HEXADECIMAL_POINT("."); // This is for the letter 'p' before the exponent. - const char exp_separator = a + ('p' - 'a'); + const char exp_separator = internal::islower(to_conv.conv_name) ? 'p' : 'P'; constexpr int EXP_SEPARATOR_LEN = 1; padding = static_cast(to_conv.min_width - (sign_char > 0 ? 1 : 0) - diff --git a/libc/src/stdio/printf_core/float_inf_nan_converter.h b/libc/src/stdio/printf_core/float_inf_nan_converter.h index a7da682b835be..3e41612e21c9f 100644 --- a/libc/src/stdio/printf_core/float_inf_nan_converter.h +++ b/libc/src/stdio/printf_core/float_inf_nan_converter.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FLOAT_INF_NAN_CONVERTER_H #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/converter_utils.h" #include "src/stdio/printf_core/core_structs.h" @@ -26,8 +27,6 @@ using StorageType = fputil::FPBits::StorageType; LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) { // All of the letters will be defined relative to variable a, which will be // the appropriate case based on the case of the conversion. - const char a = (to_conv.conv_name & 32) | 'A'; - bool is_negative; StorageType mantissa; if (to_conv.length_modifier == LengthModifier::L) { @@ -66,9 +65,11 @@ LIBC_INLINE int convert_inf_nan(Writer *writer, const FormatSection &to_conv) { if (sign_char) RET_IF_RESULT_NEGATIVE(writer->write(sign_char)); if (mantissa == 0) { // inf - RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "inf" : "INF")); + RET_IF_RESULT_NEGATIVE( + writer->write(internal::islower(to_conv.conv_name) ? "inf" : "INF")); } else { // nan - RET_IF_RESULT_NEGATIVE(writer->write(a == 'a' ? "nan" : "NAN")); + RET_IF_RESULT_NEGATIVE( + writer->write(internal::islower(to_conv.conv_name) ? "nan" : "NAN")); } if (padding > 0 && ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) == diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h index f345e86b97a69..d0af229f89be5 100644 --- a/libc/src/stdio/printf_core/int_converter.h +++ b/libc/src/stdio/printf_core/int_converter.h @@ -11,6 +11,7 @@ #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" +#include "src/__support/ctype_utils.h" #include "src/__support/integer_to_string.h" #include "src/__support/macros/config.h" #include "src/stdio/printf_core/converter_utils.h" @@ -23,11 +24,6 @@ namespace LIBC_NAMESPACE_DECL { namespace printf_core { -// These functions only work on characters that are already known to be in the -// alphabet. Their behavior is undefined otherwise. -LIBC_INLINE constexpr char to_lower(char a) { return a | 32; } -LIBC_INLINE constexpr bool is_lower(char a) { return (a & 32) > 0; } - namespace details { using HexFmt = IntegerToString; @@ -49,14 +45,14 @@ LIBC_INLINE constexpr size_t num_buf_size() { LIBC_INLINE cpp::optional num_to_strview(uintmax_t num, cpp::span bufref, char conv_name) { - if (to_lower(conv_name) == 'x') { - if (is_lower(conv_name)) + if (internal::tolower(conv_name) == 'x') { + if (internal::islower(conv_name)) return HexFmt::format_to(bufref, num); else return HexFmtUppercase::format_to(bufref, num); } else if (conv_name == 'o') { return OctFmt::format_to(bufref, num); - } else if (to_lower(conv_name) == 'b') { + } else if (internal::tolower(conv_name) == 'b') { return BinFmt::format_to(bufref, num); } else { return DecFmt::format_to(bufref, num); @@ -72,7 +68,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) { uintmax_t num = static_cast(to_conv.conv_val_raw); bool is_negative = false; FormatFlags flags = to_conv.flags; - const char a = is_lower(to_conv.conv_name) ? 'a' : 'A'; // If the conversion is signed, then handle negative values. if (to_conv.conv_name == 'd' || to_conv.conv_name == 'i') { @@ -116,16 +111,16 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) { // conversions. Since hexadecimal is unsigned these will never conflict. size_t prefix_len; char prefix[2]; - if ((to_lower(to_conv.conv_name) == 'x') && + if ((internal::tolower(to_conv.conv_name) == 'x') && ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) { prefix_len = 2; prefix[0] = '0'; - prefix[1] = a + ('x' - 'a'); - } else if ((to_lower(to_conv.conv_name) == 'b') && + prefix[1] = internal::islower(to_conv.conv_name) ? 'x' : 'X'; + } else if ((internal::tolower(to_conv.conv_name) == 'b') && ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) { prefix_len = 2; prefix[0] = '0'; - prefix[1] = a + ('b' - 'a'); + prefix[1] = internal::islower(to_conv.conv_name) ? 'b' : 'B'; } else { prefix_len = (sign_char == 0 ? 0 : 1); prefix[0] = sign_char; diff --git a/libc/src/stdio/scanf_core/converter_utils.h b/libc/src/stdio/scanf_core/converter_utils.h index 61954556b838a..6f4d16cffb19c 100644 --- a/libc/src/stdio/scanf_core/converter_utils.h +++ b/libc/src/stdio/scanf_core/converter_utils.h @@ -19,16 +19,6 @@ namespace LIBC_NAMESPACE_DECL { namespace scanf_core { -LIBC_INLINE constexpr char to_lower(char a) { return a | 32; } - -LIBC_INLINE constexpr int b36_char_to_int(char input) { - if (internal::isdigit(input)) - return input - '0'; - if (internal::isalpha(input)) - return to_lower(input) + 10 - 'a'; - return 0; -} - LIBC_INLINE void write_int_with_length(uintmax_t output_val, const FormatSection &to_conv) { if ((to_conv.flags & NO_WRITE) != 0) { diff --git a/libc/src/stdio/scanf_core/float_converter.cpp b/libc/src/stdio/scanf_core/float_converter.cpp index b2d60a249a5a7..9c714d0727214 100644 --- a/libc/src/stdio/scanf_core/float_converter.cpp +++ b/libc/src/stdio/scanf_core/float_converter.cpp @@ -55,11 +55,12 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { // Handle inf - if (to_lower(cur_char) == inf_string[0]) { + if (internal::tolower(cur_char) == inf_string[0]) { size_t inf_index = 0; - for (; inf_index < sizeof(inf_string) && out_str.length() < max_width && - to_lower(cur_char) == inf_string[inf_index]; + for (; + inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width && + internal::tolower(cur_char) == inf_string[inf_index]; ++inf_index) { if (!out_str.append(cur_char)) { return ALLOCATION_FAILURE; @@ -78,11 +79,12 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { static const char nan_string[] = "nan"; // Handle nan - if (to_lower(cur_char) == nan_string[0]) { + if (internal::tolower(cur_char) == nan_string[0]) { size_t nan_index = 0; - for (; nan_index < sizeof(nan_string) && out_str.length() < max_width && - to_lower(cur_char) == nan_string[nan_index]; + for (; + nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width && + internal::tolower(cur_char) == nan_string[nan_index]; ++nan_index) { if (!out_str.append(cur_char)) { return ALLOCATION_FAILURE; @@ -117,7 +119,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { } // If that next character is an 'x' then this is a hexadecimal number. - if (to_lower(cur_char) == 'x') { + if (internal::tolower(cur_char) == 'x') { base = 16; if (!out_str.append(cur_char)) { @@ -163,7 +165,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { // Handle the exponent, which has an exponent mark, an optional sign, and // decimal digits. - if (to_lower(cur_char) == exponent_mark) { + if (internal::tolower(cur_char) == exponent_mark) { if (!out_str.append(cur_char)) { return ALLOCATION_FAILURE; } diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp index ecdac52e84bbd..fce817245c010 100644 --- a/libc/src/stdio/scanf_core/int_converter.cpp +++ b/libc/src/stdio/scanf_core/int_converter.cpp @@ -80,7 +80,8 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { is_signed = true; } else if (to_conv.conv_name == 'o') { base = 8; - } else if (to_lower(to_conv.conv_name) == 'x' || to_conv.conv_name == 'p') { + } else if (internal::tolower(to_conv.conv_name) == 'x' || + to_conv.conv_name == 'p') { base = 16; } else if (to_conv.conv_name == 'd') { base = 10; @@ -122,7 +123,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { return READ_OK; } - if (to_lower(cur_char) == 'x') { + if (internal::tolower(cur_char) == 'x') { // This is a valid hex prefix. is_number = false; @@ -175,17 +176,18 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { const uintmax_t max_div_by_base = MAX / base; - if (internal::isalnum(cur_char) && b36_char_to_int(cur_char) < base) { + if (internal::isalnum(cur_char) && + internal::b36_char_to_int(cur_char) < base) { is_number = true; } bool has_overflow = false; size_t i = 0; for (; i < max_width && internal::isalnum(cur_char) && - b36_char_to_int(cur_char) < base; + internal::b36_char_to_int(cur_char) < base; ++i, cur_char = reader->getc()) { - uintmax_t cur_digit = b36_char_to_int(cur_char); + uintmax_t cur_digit = internal::b36_char_to_int(cur_char); if (result == MAX) { has_overflow = true; diff --git a/libc/src/stdio/scanf_core/ptr_converter.cpp b/libc/src/stdio/scanf_core/ptr_converter.cpp index 1a42a389d74b4..37f002d3da698 100644 --- a/libc/src/stdio/scanf_core/ptr_converter.cpp +++ b/libc/src/stdio/scanf_core/ptr_converter.cpp @@ -8,6 +8,7 @@ #include "src/stdio/scanf_core/ptr_converter.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "src/stdio/scanf_core/converter_utils.h" #include "src/stdio/scanf_core/core_structs.h" @@ -24,7 +25,8 @@ int convert_pointer(Reader *reader, const FormatSection &to_conv) { // Check if it's exactly the nullptr string, if so then it's a nullptr. char cur_char = reader->getc(); size_t i = 0; - for (; i < sizeof(nullptr_string) && to_lower(cur_char) == nullptr_string[i]; + for (; i < (sizeof(nullptr_string) - 1) && + internal::tolower(cur_char) == nullptr_string[i]; ++i) { cur_char = reader->getc(); } diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 69a0dada23b4c..a4c4748380572 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -740,6 +740,7 @@ libc_support_library( hdrs = ["src/__support/integer_literals.h"], deps = [ ":__support_cpp_limits", + ":__support_ctype_utils", ":__support_uint128", ], ) @@ -4450,6 +4451,7 @@ libc_support_library( ":__support_cpp_limits", ":__support_cpp_span", ":__support_cpp_string_view", + ":__support_ctype_utils", ":__support_float_to_string", ":__support_fputil_fenv_impl", ":__support_fputil_fp_bits", From d9db3ace0b2312c228595955238798fb07b436e3 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Wed, 20 Nov 2024 16:41:26 -0800 Subject: [PATCH 3/5] Fix everything else A bunch of string conversion/testing code needed to be updated. I'm gonna need to clean it up more later but that's gonna be a followup. Also I need to set up the ctype function in bazel. Blarg. --- libc/src/__support/high_precision_decimal.h | 10 +++-- libc/src/__support/integer_to_string.h | 7 +-- libc/src/__support/str_to_float.h | 2 +- libc/src/__support/str_to_integer.h | 10 +---- libc/src/ctype/toupper.cpp | 6 +-- libc/src/ctype/toupper_l.cpp | 4 +- .../stdio/printf_core/float_hex_converter.h | 3 -- libc/test/UnitTest/MemoryMatcher.cpp | 4 +- .../src/__support/CPP/stringview_test.cpp | 5 ++- libc/test/src/ctype/isalnum_test.cpp | 38 +++++++++++++--- libc/test/src/ctype/isalpha_test.cpp | 31 ++++++++++++- libc/test/src/ctype/isdigit_test.cpp | 31 +++++++++++-- libc/test/src/ctype/islower_test.cpp | 33 ++++++++++++-- libc/test/src/ctype/isupper_test.cpp | 33 ++++++++++++-- libc/test/src/ctype/isxdigit_test.cpp | 35 +++++++++++++-- libc/test/src/ctype/tolower_test.cpp | 44 +++++++++++++++++-- libc/test/src/ctype/toupper_test.cpp | 44 +++++++++++++++++-- libc/test/src/stdlib/StrtolTest.h | 27 ++++++------ libc/test/src/string/strcmp_test.cpp | 20 ++++----- .../llvm-project-overlay/libc/BUILD.bazel | 1 + .../libc/test/UnitTest/BUILD.bazel | 5 ++- 21 files changed, 312 insertions(+), 81 deletions(-) diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h index ac11649d1d168..6f3d0470fdfbf 100644 --- a/libc/src/__support/high_precision_decimal.h +++ b/libc/src/__support/high_precision_decimal.h @@ -178,9 +178,11 @@ class HighPrecisionDecimal { if (digit_index >= this->num_digits) { return new_digits - 1; } - if (this->digits[digit_index] != power_of_five[digit_index] - '0') { + if (this->digits[digit_index] != + internal::b36_char_to_int(power_of_five[digit_index])) { return new_digits - - ((this->digits[digit_index] < power_of_five[digit_index] - '0') + ((this->digits[digit_index] < + internal::b36_char_to_int(power_of_five[digit_index])) ? 1 : 0); } @@ -337,8 +339,8 @@ class HighPrecisionDecimal { } ++total_digits; if (this->num_digits < MAX_NUM_DIGITS) { - this->digits[this->num_digits] = - static_cast(num_string[num_cur] - '0'); + this->digits[this->num_digits] = static_cast( + internal::b36_char_to_int(num_string[num_cur])); ++this->num_digits; } else if (num_string[num_cur] != '0') { this->truncated = true; diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 40d45a05ceadb..496060773d9a8 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -69,6 +69,7 @@ #include "src/__support/CPP/type_traits.h" #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t #include "src/__support/common.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { @@ -214,9 +215,9 @@ template class IntegerToString { using UNSIGNED_T = make_integral_or_big_int_unsigned_t; LIBC_INLINE static char digit_char(uint8_t digit) { - if (digit < 10) - return '0' + static_cast(digit); - return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast(digit - 10); + const char result = static_cast(internal::int_to_b36_char(digit)); + return static_cast(Fmt::IS_UPPERCASE ? internal::toupper(result) + : result); } LIBC_INLINE static void diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index 80ea334d15c03..b4d5646822df3 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -909,7 +909,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, cpp::numeric_limits::max() / BASE; while (true) { if (isdigit(src[index])) { - uint32_t digit = src[index] - '0'; + uint32_t digit = b36_char_to_int(src[index]); seen_digit = true; if (mantissa < bitstype_max_div_by_base) { diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h index 86611f9a6902d..8e569e8a7feb0 100644 --- a/libc/src/__support/str_to_integer.h +++ b/libc/src/__support/str_to_integer.h @@ -42,14 +42,6 @@ first_non_whitespace(const char *__restrict src, return src + src_cur; } -LIBC_INLINE int b36_char_to_int(char input) { - if (isdigit(input)) - return input - '0'; - if (isalpha(input)) - return (input | 32) + 10 - 'a'; - return 0; -} - // checks if the next 3 characters of the string pointer are the start of a // hexadecimal number. Does not advance the string pointer. LIBC_INLINE bool @@ -57,7 +49,7 @@ is_hex_start(const char *__restrict src, size_t src_len = cpp::numeric_limits::max()) { if (src_len < 3) return false; - return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) && + return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) && b36_char_to_int(*(src + 2)) < 16; } diff --git a/libc/src/ctype/toupper.cpp b/libc/src/ctype/toupper.cpp index b5a23fc7f588b..1e1e8fc400711 100644 --- a/libc/src/ctype/toupper.cpp +++ b/libc/src/ctype/toupper.cpp @@ -14,10 +14,6 @@ namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(int, toupper, (int c)) { - if (internal::islower(c)) - return c - ('a' - 'A'); - return c; -} +LLVM_LIBC_FUNCTION(int, toupper, (int c)) { return internal::toupper(c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/ctype/toupper_l.cpp b/libc/src/ctype/toupper_l.cpp index f536ff3623616..a435ca1ab5d41 100644 --- a/libc/src/ctype/toupper_l.cpp +++ b/libc/src/ctype/toupper_l.cpp @@ -15,9 +15,7 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, toupper_l, (int c, locale_t)) { - if (internal::islower(c)) - return c - ('a' - 'A'); - return c; + return internal::toupper(c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index 5d9c42882a589..accca52d281aa 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -29,9 +29,6 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, const FormatSection &to_conv) { using LDBits = fputil::FPBits; using StorageType = LDBits::StorageType; - // All of the letters will be defined relative to variable a, which will be - // the appropriate case based on the name of the conversion. This converts any - // conversion name into the letter 'a' with the appropriate case. bool is_negative; int exponent; diff --git a/libc/test/UnitTest/MemoryMatcher.cpp b/libc/test/UnitTest/MemoryMatcher.cpp index 244f25572c378..3cd5174fd7f75 100644 --- a/libc/test/UnitTest/MemoryMatcher.cpp +++ b/libc/test/UnitTest/MemoryMatcher.cpp @@ -8,6 +8,7 @@ #include "MemoryMatcher.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "test/UnitTest/Test.h" @@ -40,7 +41,8 @@ bool MemoryMatcher::match(MemoryView actualValue) { static void display(char C) { const auto print = [](unsigned char I) { - tlog << static_cast(I < 10 ? '0' + I : 'A' + I - 10); + tlog << static_cast(LIBC_NAMESPACE::internal::toupper( + LIBC_NAMESPACE::internal::int_to_b36_char(I))); }; print(static_cast(C) / 16); print(static_cast(C) & 15); diff --git a/libc/test/src/__support/CPP/stringview_test.cpp b/libc/test/src/__support/CPP/stringview_test.cpp index 6b68f2a1c47a9..c9348243745a7 100644 --- a/libc/test/src/__support/CPP/stringview_test.cpp +++ b/libc/test/src/__support/CPP/stringview_test.cpp @@ -109,8 +109,6 @@ TEST(LlvmLibcStringViewTest, Observer) { ASSERT_EQ(ABC.back(), 'c'); } -bool isDigit(char c) { return c >= '0' && c <= '9'; } - TEST(LlvmLibcStringViewTest, FindFirstOf) { string_view Tmp("abca"); ASSERT_TRUE(Tmp.find_first_of('a') == 0); @@ -236,6 +234,9 @@ TEST(LlvmLibcStringViewTest, FindFirstNotOf) { TEST(LlvmLibcStringViewTest, Contains) { string_view Empty; + static_assert( + 'a' < 'z', + "This test only supports character encodings where 'a' is below 'z'"); for (char c = 'a'; c < 'z'; ++c) EXPECT_FALSE(Empty.contains(c)); diff --git a/libc/test/src/ctype/isalnum_test.cpp b/libc/test/src/ctype/isalnum_test.cpp index b71d36111d725..92915db4e4bcb 100644 --- a/libc/test/src/ctype/isalnum_test.cpp +++ b/libc/test/src/ctype/isalnum_test.cpp @@ -6,18 +6,46 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isalnum.h" #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsAlNum, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isalnum('a'), 0); + EXPECT_NE(LIBC_NAMESPACE::isalnum('B'), 0); + EXPECT_NE(LIBC_NAMESPACE::isalnum('3'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isalnum(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalnum('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalnum('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalnum(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char ALNUM_ARRAY[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) { + if (static_cast(arr[i]) == ch) + return true; + } + return false; +} + TEST(LlvmLibcIsAlNum, DefaultLocale) { // Loops through all characters, verifying that numbers and letters // return non-zero integer and everything else returns a zero. - for (int c = -255; c < 255; ++c) { - if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || - ('0' <= c && c <= '9')) - EXPECT_NE(LIBC_NAMESPACE::isalnum(c), 0); + for (int ch = -255; ch < 255; ++ch) { + if (in_span(ch, ALNUM_ARRAY)) + EXPECT_NE(LIBC_NAMESPACE::isalnum(ch), 0); else - EXPECT_EQ(LIBC_NAMESPACE::isalnum(c), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalnum(ch), 0); } } diff --git a/libc/test/src/ctype/isalpha_test.cpp b/libc/test/src/ctype/isalpha_test.cpp index 10cdb962ee2ee..ed4bc13969f59 100644 --- a/libc/test/src/ctype/isalpha_test.cpp +++ b/libc/test/src/ctype/isalpha_test.cpp @@ -6,15 +6,44 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isalpha.h" #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsAlpha, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isalpha('a'), 0); + EXPECT_NE(LIBC_NAMESPACE::isalpha('B'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isalpha('3'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalpha(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalpha('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalpha('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isalpha(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char ALPHA_ARRAY[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) { + if (static_cast(arr[i]) == ch) + return true; + } + return false; +} + TEST(LlvmLibcIsAlpha, DefaultLocale) { // Loops through all characters, verifying that letters return a // non-zero integer and everything else returns zero. + // TODO: encoding indep for (int ch = -255; ch < 255; ++ch) { - if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) + if (in_span(ch, ALPHA_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::isalpha(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::isalpha(ch), 0); diff --git a/libc/test/src/ctype/isdigit_test.cpp b/libc/test/src/ctype/isdigit_test.cpp index a9f84db3ef7e8..fc459ab506e49 100644 --- a/libc/test/src/ctype/isdigit_test.cpp +++ b/libc/test/src/ctype/isdigit_test.cpp @@ -6,15 +6,40 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isdigit.h" #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsDigit, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isdigit('3'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isdigit('a'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit('B'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isdigit(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char DIGIT_ARRAY[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) { + if (static_cast(arr[i]) == ch) + return true; + } + return false; +} + TEST(LlvmLibcIsDigit, DefaultLocale) { - // Loops through all characters, verifying that numbers return a - // non-zero integer and everything else returns zero. + // Loops through all characters, verifying that numbers and letters + // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { - if ('0' <= ch && ch <= '9') + if (in_span(ch, DIGIT_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::isdigit(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::isdigit(ch), 0); diff --git a/libc/test/src/ctype/islower_test.cpp b/libc/test/src/ctype/islower_test.cpp index ba7caf65b6fd3..474feb09a4cf0 100644 --- a/libc/test/src/ctype/islower_test.cpp +++ b/libc/test/src/ctype/islower_test.cpp @@ -6,14 +6,41 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/islower.h" + #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsLower, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::islower('a'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::islower('B'), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower('3'), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::islower(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char LOWER_ARRAY[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) { + if (static_cast(arr[i]) == ch) + return true; + } + return false; +} + TEST(LlvmLibcIsLower, DefaultLocale) { - // Loops through all characters, verifying that lowercase letters - // return a non-zero integer and everything else returns zero. + // Loops through all characters, verifying that numbers and letters + // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { - if ('a' <= ch && ch <= 'z') + if (in_span(ch, LOWER_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::islower(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::islower(ch), 0); diff --git a/libc/test/src/ctype/isupper_test.cpp b/libc/test/src/ctype/isupper_test.cpp index 05b2fd069ef06..667acc198f3ff 100644 --- a/libc/test/src/ctype/isupper_test.cpp +++ b/libc/test/src/ctype/isupper_test.cpp @@ -6,14 +6,41 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isupper.h" + #include "test/UnitTest/Test.h" +TEST(LlvmLibcIsUpper, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isupper('B'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isupper('a'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper('3'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isupper(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char UPPER_ARRAY[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) { + if (static_cast(arr[i]) == ch) + return true; + } + return false; +} + TEST(LlvmLibcIsUpper, DefaultLocale) { - // Loops through all characters, verifying that uppercase letters - // return a non-zero integer and everything else returns zero. + // Loops through all characters, verifying that numbers and letters + // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { - if ('A' <= ch && ch <= 'Z') + if (in_span(ch, UPPER_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::isupper(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::isupper(ch), 0); diff --git a/libc/test/src/ctype/isxdigit_test.cpp b/libc/test/src/ctype/isxdigit_test.cpp index b8f27a968540c..29d27a0e497fb 100644 --- a/libc/test/src/ctype/isxdigit_test.cpp +++ b/libc/test/src/ctype/isxdigit_test.cpp @@ -6,13 +6,42 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/isxdigit.h" + #include "test/UnitTest/Test.h" -TEST(LlvmLibcIsXDigit, DefaultLocale) { +TEST(LlvmLibcIsXdigit, SimpleTest) { + EXPECT_NE(LIBC_NAMESPACE::isxdigit('a'), 0); + EXPECT_NE(LIBC_NAMESPACE::isxdigit('B'), 0); + EXPECT_NE(LIBC_NAMESPACE::isxdigit('3'), 0); + + EXPECT_EQ(LIBC_NAMESPACE::isxdigit('z'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isxdigit(' '), 0); + EXPECT_EQ(LIBC_NAMESPACE::isxdigit('?'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isxdigit('\0'), 0); + EXPECT_EQ(LIBC_NAMESPACE::isxdigit(-1), 0); +} + +// TODO: Merge the ctype tests using this framework. +constexpr char XDIGIT_ARRAY[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', + 'F', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', +}; + +bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) { + if (static_cast(arr[i]) == ch) + return true; + } + return false; +} + +TEST(LlvmLibcIsXdigit, DefaultLocale) { + // Loops through all characters, verifying that numbers and letters + // return non-zero integer and everything else returns a zero. for (int ch = -255; ch < 255; ++ch) { - if (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || - ('A' <= ch && ch <= 'F')) + if (in_span(ch, XDIGIT_ARRAY)) EXPECT_NE(LIBC_NAMESPACE::isxdigit(ch), 0); else EXPECT_EQ(LIBC_NAMESPACE::isxdigit(ch), 0); diff --git a/libc/test/src/ctype/tolower_test.cpp b/libc/test/src/ctype/tolower_test.cpp index 3770ce4ea68b6..4c5ef9543be27 100644 --- a/libc/test/src/ctype/tolower_test.cpp +++ b/libc/test/src/ctype/tolower_test.cpp @@ -6,14 +6,52 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/tolower.h" + #include "test/UnitTest/Test.h" +TEST(LlvmLibcToLower, SimpleTest) { + EXPECT_EQ(LIBC_NAMESPACE::tolower('a'), int('a')); + EXPECT_EQ(LIBC_NAMESPACE::tolower('B'), int('b')); + EXPECT_EQ(LIBC_NAMESPACE::tolower('3'), int('3')); + + EXPECT_EQ(LIBC_NAMESPACE::tolower(' '), int(' ')); + EXPECT_EQ(LIBC_NAMESPACE::tolower('?'), int('?')); + EXPECT_EQ(LIBC_NAMESPACE::tolower('\0'), int('\0')); + EXPECT_EQ(LIBC_NAMESPACE::tolower(-1), int(-1)); +} + +// TODO: Merge the ctype tests using this framework. +// Invariant: UPPER_ARR and LOWER_ARR are both the complete alphabet in the same +// order. +constexpr char UPPER_ARR[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +}; +constexpr char LOWER_ARR[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +}; + +static_assert( + sizeof(UPPER_ARR) == sizeof(LOWER_ARR), + "There must be the same number of uppercase and lowercase letters."); + +int span_index(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) { + if (static_cast(arr[i]) == ch) + return static_cast(i); + } + return -1; +} + TEST(LlvmLibcToLower, DefaultLocale) { for (int ch = -255; ch < 255; ++ch) { - // This follows pattern 'A' + 32 = 'a'. - if ('A' <= ch && ch <= 'Z') - EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), ch + 32); + int char_index = span_index(ch, UPPER_ARR); + if (char_index != -1) + EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), + static_cast(LOWER_ARR[char_index])); else EXPECT_EQ(LIBC_NAMESPACE::tolower(ch), ch); } diff --git a/libc/test/src/ctype/toupper_test.cpp b/libc/test/src/ctype/toupper_test.cpp index 0413b43fb6009..7346f31824a55 100644 --- a/libc/test/src/ctype/toupper_test.cpp +++ b/libc/test/src/ctype/toupper_test.cpp @@ -6,14 +6,52 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/CPP/span.h" #include "src/ctype/toupper.h" + #include "test/UnitTest/Test.h" +TEST(LlvmLibcToUpper, SimpleTest) { + EXPECT_EQ(LIBC_NAMESPACE::toupper('a'), int('A')); + EXPECT_EQ(LIBC_NAMESPACE::toupper('B'), int('B')); + EXPECT_EQ(LIBC_NAMESPACE::toupper('3'), int('3')); + + EXPECT_EQ(LIBC_NAMESPACE::toupper(' '), int(' ')); + EXPECT_EQ(LIBC_NAMESPACE::toupper('?'), int('?')); + EXPECT_EQ(LIBC_NAMESPACE::toupper('\0'), int('\0')); + EXPECT_EQ(LIBC_NAMESPACE::toupper(-1), int(-1)); +} + +// TODO: Merge the ctype tests using this framework. +// Invariant: UPPER_ARR and LOWER_ARR are both the complete alphabet in the same +// order. +constexpr char UPPER_ARR[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +}; +constexpr char LOWER_ARR[] = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +}; + +static_assert( + sizeof(UPPER_ARR) == sizeof(LOWER_ARR), + "There must be the same number of uppercase and lowercase letters."); + +int span_index(int ch, LIBC_NAMESPACE::cpp::span arr) { + for (size_t i = 0; i < arr.size(); ++i) { + if (static_cast(arr[i]) == ch) + return static_cast(i); + } + return -1; +} + TEST(LlvmLibcToUpper, DefaultLocale) { for (int ch = -255; ch < 255; ++ch) { - // This follows pattern 'a' - 32 = 'A'. - if ('a' <= ch && ch <= 'z') - EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), ch - 32); + int char_index = span_index(ch, LOWER_ARR); + if (char_index != -1) + EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), + static_cast(UPPER_ARR[char_index])); else EXPECT_EQ(LIBC_NAMESPACE::toupper(ch), ch); } diff --git a/libc/test/src/stdlib/StrtolTest.h b/libc/test/src/stdlib/StrtolTest.h index 8a67848e4c330..6cfaddcbedeb6 100644 --- a/libc/test/src/stdlib/StrtolTest.h +++ b/libc/test/src/stdlib/StrtolTest.h @@ -8,6 +8,7 @@ #include "src/__support/CPP/limits.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/ctype_utils.h" #include "src/__support/macros/properties/architectures.h" #include "src/errno/libc_errno.h" #include "test/UnitTest/Test.h" @@ -16,14 +17,6 @@ using LIBC_NAMESPACE::cpp::is_signed_v; -static inline char int_to_b36_char(int input) { - if (input < 0 || input > 36) - return '0'; - if (input < 10) - return static_cast('0' + input); - return static_cast('A' + input - 10); -} - template struct StrtoTest : public LIBC_NAMESPACE::testing::Test { using FunctionT = ReturnT (*)(const char *, char **, int); @@ -207,7 +200,8 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test { char small_string[4] = {'\0', '\0', '\0', '\0'}; for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = int_to_b36_char(first_digit); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); if (first_digit < base) { LIBC_NAMESPACE::libc_errno = 0; ASSERT_EQ(func(small_string, nullptr, base), @@ -223,9 +217,11 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = int_to_b36_char(first_digit); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = int_to_b36_char(second_digit); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_char(second_digit); if (first_digit < base && second_digit < base) { LIBC_NAMESPACE::libc_errno = 0; ASSERT_EQ( @@ -248,11 +244,14 @@ struct StrtoTest : public LIBC_NAMESPACE::testing::Test { for (int base = 2; base <= 36; ++base) { for (int first_digit = 0; first_digit <= 36; ++first_digit) { - small_string[0] = int_to_b36_char(first_digit); + small_string[0] = + LIBC_NAMESPACE::internal::int_to_b36_char(first_digit); for (int second_digit = 0; second_digit <= 36; ++second_digit) { - small_string[1] = int_to_b36_char(second_digit); + small_string[1] = + LIBC_NAMESPACE::internal::int_to_b36_char(second_digit); for (int third_digit = 0; third_digit <= limit; ++third_digit) { - small_string[2] = int_to_b36_char(third_digit); + small_string[2] = + LIBC_NAMESPACE::internal::int_to_b36_char(third_digit); if (first_digit < base && second_digit < base && third_digit < base) { diff --git a/libc/test/src/string/strcmp_test.cpp b/libc/test/src/string/strcmp_test.cpp index ef58dc608c83b..234447610222f 100644 --- a/libc/test/src/string/strcmp_test.cpp +++ b/libc/test/src/string/strcmp_test.cpp @@ -25,13 +25,13 @@ TEST(LlvmLibcStrCmpTest, EmptyStringShouldNotEqualNonEmptyString) { const char *s2 = "abc"; int result = LIBC_NAMESPACE::strcmp(empty, s2); // This should be '\0' - 'a' = -97 - ASSERT_EQ(result, -97); + ASSERT_EQ(result, '\0' - 'a'); // Similar case if empty string is second argument. const char *s3 = "123"; result = LIBC_NAMESPACE::strcmp(s3, empty); // This should be '1' - '\0' = 49 - ASSERT_EQ(result, 49); + ASSERT_EQ(result, '1' - '\0'); } TEST(LlvmLibcStrCmpTest, EqualStringsShouldReturnZero) { @@ -50,12 +50,12 @@ TEST(LlvmLibcStrCmpTest, ShouldReturnResultOfFirstDifference) { const char *s2 = "___C55__"; int result = LIBC_NAMESPACE::strcmp(s1, s2); // This should return 'B' - 'C' = -1. - ASSERT_EQ(result, -1); + ASSERT_EQ(result, 'B' - 'C'); // Verify operands reversed. result = LIBC_NAMESPACE::strcmp(s2, s1); // This should return 'C' - 'B' = 1. - ASSERT_EQ(result, 1); + ASSERT_EQ(result, 'C' - 'B'); } TEST(LlvmLibcStrCmpTest, CapitalizedLetterShouldNotBeEqual) { @@ -63,12 +63,12 @@ TEST(LlvmLibcStrCmpTest, CapitalizedLetterShouldNotBeEqual) { const char *s2 = "abCd"; int result = LIBC_NAMESPACE::strcmp(s1, s2); // 'c' - 'C' = 32. - ASSERT_EQ(result, 32); + ASSERT_EQ(result, 'c' - 'C'); // Verify operands reversed. result = LIBC_NAMESPACE::strcmp(s2, s1); // 'C' - 'c' = -32. - ASSERT_EQ(result, -32); + ASSERT_EQ(result, 'C' - 'c'); } TEST(LlvmLibcStrCmpTest, UnequalLengthStringsShouldNotReturnZero) { @@ -76,12 +76,12 @@ TEST(LlvmLibcStrCmpTest, UnequalLengthStringsShouldNotReturnZero) { const char *s2 = "abcd"; int result = LIBC_NAMESPACE::strcmp(s1, s2); // '\0' - 'd' = -100. - ASSERT_EQ(result, -100); + ASSERT_EQ(result, -'\0' - 'd'); // Verify operands reversed. result = LIBC_NAMESPACE::strcmp(s2, s1); // 'd' - '\0' = 100. - ASSERT_EQ(result, 100); + ASSERT_EQ(result, 'd' - '\0'); } TEST(LlvmLibcStrCmpTest, StringArgumentSwapChangesSign) { @@ -89,11 +89,11 @@ TEST(LlvmLibcStrCmpTest, StringArgumentSwapChangesSign) { const char *b = "b"; int result = LIBC_NAMESPACE::strcmp(b, a); // 'b' - 'a' = 1. - ASSERT_EQ(result, 1); + ASSERT_EQ(result, 'b' - 'a'); result = LIBC_NAMESPACE::strcmp(a, b); // 'a' - 'b' = -1. - ASSERT_EQ(result, -1); + ASSERT_EQ(result, 'a' - 'b'); } TEST(LlvmLibcStrCmpTest, Case) { diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index a4c4748380572..fd790fbdc6e40 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -773,6 +773,7 @@ libc_support_library( ":__support_cpp_span", ":__support_cpp_string_view", ":__support_cpp_type_traits", + ":__support_ctype_utils", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index f387741e95d8b..6db3456edbb70 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -63,12 +63,12 @@ libc_support_library( "//libc:__support_stringutil", "//libc:__support_uint128", "//libc:errno", - "//libc:llvm_libc_macros_stdfix_macros", - "//llvm:Support", "//libc:func_aligned_alloc", "//libc:func_free", "//libc:func_malloc", "//libc:func_realloc", + "//libc:llvm_libc_macros_stdfix_macros", + "//llvm:Support", ], ) @@ -121,6 +121,7 @@ libc_support_library( "//libc:__support_cpp_bitset", "//libc:__support_cpp_span", "//libc:__support_cpp_type_traits", + "//libc:__support_ctype_utils", "//libc:__support_macros_config", ], ) From 8d25d9079f7305e483a293e4cf3f1bd1ef140dc3 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 3 Dec 2024 11:24:08 -0800 Subject: [PATCH 4/5] address comments --- libc/src/__support/ctype_utils.h | 11 +++++++++++ libc/src/stdio/printf_core/float_hex_converter.h | 3 +-- libc/test/src/ctype/isalnum_test.cpp | 3 +-- libc/test/src/ctype/isalpha_test.cpp | 3 +-- libc/test/src/ctype/isdigit_test.cpp | 3 +-- libc/test/src/ctype/islower_test.cpp | 3 +-- libc/test/src/ctype/isupper_test.cpp | 3 +-- libc/test/src/ctype/isxdigit_test.cpp | 3 +-- libc/test/src/ctype/tolower_test.cpp | 3 +-- libc/test/src/ctype/toupper_test.cpp | 3 +-- 10 files changed, 20 insertions(+), 18 deletions(-) diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h index 8521857ce765d..be0f25330af9e 100644 --- a/libc/src/__support/ctype_utils.h +++ b/libc/src/__support/ctype_utils.h @@ -26,6 +26,17 @@ namespace internal { // functions, make sure you have benchmarks to show your new solution is faster, // as well as a way to support non-ASCII character encodings. +// Similarly, do not change these functions to use case ranges. e.g. +// bool islower(int ch) { +// switch(ch) { +// case 'a'...'z': +// return true; +// } +// } +// This assumes the character ranges are contiguous, which they aren't in +// EBCDIC. Technically we could use some smaller ranges, but that's even harder +// to read. + LIBC_INLINE static constexpr bool islower(int ch) { switch (ch) { case 'a': diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index accca52d281aa..b264b5cf20728 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -137,9 +137,8 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, for (; mant_cur > 0; --mant_cur, mantissa >>= 4) { char mant_mod_16 = static_cast(mantissa % 16); char new_digit = static_cast(internal::int_to_b36_char(mant_mod_16)); - if (internal::isupper(to_conv.conv_name)) { + if (internal::isupper(to_conv.conv_name)) new_digit = static_cast(internal::toupper(new_digit)); - } mant_buffer[mant_cur - 1] = new_digit; if (new_digit != '0' && first_non_zero < mant_cur) first_non_zero = mant_cur; diff --git a/libc/test/src/ctype/isalnum_test.cpp b/libc/test/src/ctype/isalnum_test.cpp index 92915db4e4bcb..18ddd2b14b8c8 100644 --- a/libc/test/src/ctype/isalnum_test.cpp +++ b/libc/test/src/ctype/isalnum_test.cpp @@ -32,10 +32,9 @@ constexpr char ALNUM_ARRAY[] = { }; bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { - for (size_t i = 0; i < arr.size(); ++i) { + for (size_t i = 0; i < arr.size(); ++i) if (static_cast(arr[i]) == ch) return true; - } return false; } diff --git a/libc/test/src/ctype/isalpha_test.cpp b/libc/test/src/ctype/isalpha_test.cpp index ed4bc13969f59..e54b580dbe264 100644 --- a/libc/test/src/ctype/isalpha_test.cpp +++ b/libc/test/src/ctype/isalpha_test.cpp @@ -31,10 +31,9 @@ constexpr char ALPHA_ARRAY[] = { }; bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { - for (size_t i = 0; i < arr.size(); ++i) { + for (size_t i = 0; i < arr.size(); ++i) if (static_cast(arr[i]) == ch) return true; - } return false; } diff --git a/libc/test/src/ctype/isdigit_test.cpp b/libc/test/src/ctype/isdigit_test.cpp index fc459ab506e49..adea55e59c74d 100644 --- a/libc/test/src/ctype/isdigit_test.cpp +++ b/libc/test/src/ctype/isdigit_test.cpp @@ -28,10 +28,9 @@ constexpr char DIGIT_ARRAY[] = { }; bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { - for (size_t i = 0; i < arr.size(); ++i) { + for (size_t i = 0; i < arr.size(); ++i) if (static_cast(arr[i]) == ch) return true; - } return false; } diff --git a/libc/test/src/ctype/islower_test.cpp b/libc/test/src/ctype/islower_test.cpp index 474feb09a4cf0..f9414bd8cbd09 100644 --- a/libc/test/src/ctype/islower_test.cpp +++ b/libc/test/src/ctype/islower_test.cpp @@ -29,10 +29,9 @@ constexpr char LOWER_ARRAY[] = { }; bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { - for (size_t i = 0; i < arr.size(); ++i) { + for (size_t i = 0; i < arr.size(); ++i) if (static_cast(arr[i]) == ch) return true; - } return false; } diff --git a/libc/test/src/ctype/isupper_test.cpp b/libc/test/src/ctype/isupper_test.cpp index 667acc198f3ff..94def1a9dcccd 100644 --- a/libc/test/src/ctype/isupper_test.cpp +++ b/libc/test/src/ctype/isupper_test.cpp @@ -29,10 +29,9 @@ constexpr char UPPER_ARRAY[] = { }; bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { - for (size_t i = 0; i < arr.size(); ++i) { + for (size_t i = 0; i < arr.size(); ++i) if (static_cast(arr[i]) == ch) return true; - } return false; } diff --git a/libc/test/src/ctype/isxdigit_test.cpp b/libc/test/src/ctype/isxdigit_test.cpp index 29d27a0e497fb..d7253d549907b 100644 --- a/libc/test/src/ctype/isxdigit_test.cpp +++ b/libc/test/src/ctype/isxdigit_test.cpp @@ -30,10 +30,9 @@ constexpr char XDIGIT_ARRAY[] = { }; bool in_span(int ch, LIBC_NAMESPACE::cpp::span arr) { - for (size_t i = 0; i < arr.size(); ++i) { + for (size_t i = 0; i < arr.size(); ++i) if (static_cast(arr[i]) == ch) return true; - } return false; } diff --git a/libc/test/src/ctype/tolower_test.cpp b/libc/test/src/ctype/tolower_test.cpp index 4c5ef9543be27..59432c43297b3 100644 --- a/libc/test/src/ctype/tolower_test.cpp +++ b/libc/test/src/ctype/tolower_test.cpp @@ -39,10 +39,9 @@ static_assert( "There must be the same number of uppercase and lowercase letters."); int span_index(int ch, LIBC_NAMESPACE::cpp::span arr) { - for (size_t i = 0; i < arr.size(); ++i) { + for (size_t i = 0; i < arr.size(); ++i) if (static_cast(arr[i]) == ch) return static_cast(i); - } return -1; } diff --git a/libc/test/src/ctype/toupper_test.cpp b/libc/test/src/ctype/toupper_test.cpp index 7346f31824a55..045b00bbb4b93 100644 --- a/libc/test/src/ctype/toupper_test.cpp +++ b/libc/test/src/ctype/toupper_test.cpp @@ -39,10 +39,9 @@ static_assert( "There must be the same number of uppercase and lowercase letters."); int span_index(int ch, LIBC_NAMESPACE::cpp::span arr) { - for (size_t i = 0; i < arr.size(); ++i) { + for (size_t i = 0; i < arr.size(); ++i) if (static_cast(arr[i]) == ch) return static_cast(i); - } return -1; } From c3e76f8531dfd349811b4c266bccbfa5080405ea Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 3 Dec 2024 11:37:32 -0800 Subject: [PATCH 5/5] accept another diff --- libc/src/__support/integer_to_string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 496060773d9a8..ea620087584cb 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -215,7 +215,7 @@ template class IntegerToString { using UNSIGNED_T = make_integral_or_big_int_unsigned_t; LIBC_INLINE static char digit_char(uint8_t digit) { - const char result = static_cast(internal::int_to_b36_char(digit)); + const int result = internal::int_to_b36_char(digit); return static_cast(Fmt::IS_UPPERCASE ? internal::toupper(result) : result); }