From a484c4b859cc92c1912072a3e5167e708a90d093 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Wed, 25 Jun 2025 23:27:07 +0000 Subject: [PATCH 1/5] set up build files --- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/include/wchar.yaml | 8 ++++++++ libc/src/wchar/CMakeLists.txt | 10 ++++++++++ libc/src/wchar/wcstok.cpp | 22 ++++++++++++++++++++++ libc/src/wchar/wcstok.h | 22 ++++++++++++++++++++++ 5 files changed, 63 insertions(+) create mode 100644 libc/src/wchar/wcstok.cpp create mode 100644 libc/src/wchar/wcstok.h diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 6b3fc9485ec1a..bf04ae2e83fb3 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -386,6 +386,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.wchar.wmemchr libc.src.wchar.wcpcpy libc.src.wchar.wcpncpy + libc.src.wchar.wcstok # sys/uio.h entrypoints libc.src.sys.uio.writev diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 397296894829d..15025f42c0723 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -189,6 +189,14 @@ functions: arguments: - type: wchar_t *__restrict - type: const wchar_t *__restrict + - name: wcstok + standards: + - stdc + return_type: wchar_t * + arguments: + - type: wchar_t *__restrict + - type: const wchar_t *__restrict + - type: wchar_t** __restrict - name: wcpcpy standards: - stdc diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 16664100d42c7..6d93b82b2d2bf 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -34,6 +34,16 @@ add_entrypoint_object( libc.src.__support.wctype_utils ) +add_entrypoint_object( + wcstok + SRCS + wcstok.cpp + HDRS + wcstok.h + DEPENDS + libc.hdr.types.wchar_t +) + add_entrypoint_object( wcrtomb SRCS diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp new file mode 100644 index 0000000000000..dc004c0a7af37 --- /dev/null +++ b/libc/src/wchar/wcstok.cpp @@ -0,0 +1,22 @@ +//===-- Implementation of wcstok ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstok.h" + +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(wchar_t *, wcstok, + (wchar_t *__restrict str, const wchar_t *__restrict delim, + wchar_t **__restrict ptr)) { + +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstok.h b/libc/src/wchar/wcstok.h new file mode 100644 index 0000000000000..9f41ea37a947a --- /dev/null +++ b/libc/src/wchar/wcstok.h @@ -0,0 +1,22 @@ +//===-- Implementation header for wcstok ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOK_H +#define LLVM_LIBC_SRC_WCHAR_WCSTOK_H + +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim, + wchar_t **__restrict ptr); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSTOK_H From 930e37e5de03c3879d5794b19219e211a7d8a469 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 26 Jun 2025 21:29:47 +0000 Subject: [PATCH 2/5] started impl --- libc/src/wchar/wcstok.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index dc004c0a7af37..c6ad51af25978 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -16,7 +16,15 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(wchar_t *, wcstok, (wchar_t *__restrict str, const wchar_t *__restrict delim, wchar_t **__restrict ptr)) { - + if (str == nullptr) + str = *ptr; + + while (*str != L'\0') { + bool inDelim = false; + for (const wchar_t* delim_ptr = delim; delim_ptr != L'\0'; delim_ptr++) { + + } + } } } // namespace LIBC_NAMESPACE_DECL From 17f33604af3b7680dabad1fc8a94c2ff88237f09 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Thu, 26 Jun 2025 23:13:46 +0000 Subject: [PATCH 3/5] tests --- libc/src/wchar/wcstok.cpp | 32 ++++-- libc/test/src/wchar/CMakeLists.txt | 10 ++ libc/test/src/wchar/wcstok_test.cpp | 145 ++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+), 8 deletions(-) create mode 100644 libc/test/src/wchar/wcstok_test.cpp diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index c6ad51af25978..a02332ac1fbbd 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -16,15 +16,31 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(wchar_t *, wcstok, (wchar_t *__restrict str, const wchar_t *__restrict delim, wchar_t **__restrict ptr)) { - if (str == nullptr) - str = *ptr; - - while (*str != L'\0') { - bool inDelim = false; - for (const wchar_t* delim_ptr = delim; delim_ptr != L'\0'; delim_ptr++) { - - } + if (str == nullptr) + str = *ptr; + + bool foundTokenStart = false; + wchar_t *out = nullptr; + wchar_t *str_ptr; + for (str_ptr = str; *str_ptr != L'\0'; str_ptr++) { + bool inDelim = false; + for (const wchar_t *delim_ptr = delim; *delim_ptr != L'\0' && !inDelim; + delim_ptr++) + if (*str_ptr == *delim_ptr) + inDelim = true; + + if (!inDelim && !foundTokenStart) { + foundTokenStart = true; + out = str_ptr; + } else if (inDelim && foundTokenStart) { + *str_ptr = L'\0'; + *ptr = str_ptr + 1; + return out; } + } + + *ptr = str_ptr; + return out; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index bf16fdd7f8c4d..8967cc1e8d353 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -111,6 +111,16 @@ add_libc_test( libc.src.wchar.wcschr ) +add_libc_test( + wcstok_test + SUITE + libc_wchar_unittests + SRCS + wcstok_test.cpp + DEPENDS + libc.src.wchar.wcstok +) + add_libc_test( wcsncmp_test SUITE diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp new file mode 100644 index 0000000000000..79153ccc0adad --- /dev/null +++ b/libc/test/src/wchar/wcstok_test.cpp @@ -0,0 +1,145 @@ +//===-- Unittests for wcstok ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/wchar/wcstok.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcStrTokTest, NoTokenFound) { + wchar_t empty[] = L""; + wchar_t *buf; + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &buf), nullptr); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &buf), nullptr); + + wchar_t single[] = L"_"; + wchar_t *token = LIBC_NAMESPACE::wcstok(single, L"", &buf); + ASSERT_TRUE(token[0] == L'_'); + ASSERT_TRUE(token[1] == L'\0'); + + wchar_t multiple[] = L"1,2"; + token = LIBC_NAMESPACE::wcstok(multiple, L":", &buf); + ASSERT_TRUE(multiple[0] == L'1'); + ASSERT_TRUE(multiple[1] == L','); + ASSERT_TRUE(multiple[2] == L'2'); + ASSERT_TRUE(multiple[3] == L'\0'); +} + +TEST(LlvmLibcStrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) { + wchar_t *buf; + wchar_t src[] = L".123"; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L".", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'3'); + ASSERT_TRUE(token[3] == L'\0'); +} + +TEST(LlvmLibcStrTokTest, DelimiterIsMiddleCharacter) { + wchar_t src[] = L"12,34"; + wchar_t *buf; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'\0'); +} + +TEST(LlvmLibcStrTokTest, DelimiterAsLastCharacterShouldBeIgnored) { + wchar_t src[] = L"1234:"; + wchar_t *buf; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L":", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'3'); + ASSERT_TRUE(token[3] == L'4'); + ASSERT_TRUE(token[4] == L'\0'); +} + +TEST(LlvmLibcStrTokTest, MultipleDelimiters) { + wchar_t src[] = L"12,.34"; + wchar_t *buf; + wchar_t *token; + + token = LIBC_NAMESPACE::wcstok(src, L".", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L','); + ASSERT_TRUE(token[3] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(src, L".,", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(src, L",.", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(src, L":,.", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'\0'); +} + +TEST(LlvmLibcStrTokTest, ShouldNotGoPastNullTerminator) { + wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'}; + wchar_t *buf; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'\0'); +} + +TEST(LlvmLibcStrTokTest, SubsequentCallsShouldFindFollowingDelimiters) { + wchar_t src[] = L"12,34.56"; + wchar_t *buf; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &buf); + ASSERT_TRUE(token[0] == L'1'); + ASSERT_TRUE(token[1] == L'2'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf); + ASSERT_TRUE(token[0] == L'3'); + ASSERT_TRUE(token[1] == L'4'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf); + ASSERT_TRUE(token[0] == L'5'); + ASSERT_TRUE(token[1] == L'6'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf); + ASSERT_EQ(token, nullptr); + // Subsequent calls after hitting the end of the string should also return + // nullptr. + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf); + ASSERT_EQ(token, nullptr); +} + +TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) { + wchar_t *buf; + wchar_t src[] = L"__ab__:_cd__:__ef__:__"; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &buf); + ASSERT_TRUE(token[0] == L'a'); + ASSERT_TRUE(token[1] == L'b'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &buf); + ASSERT_TRUE(token[0] == L'c'); + ASSERT_TRUE(token[1] == L'd'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &buf); + ASSERT_TRUE(token[0] == L'e'); + ASSERT_TRUE(token[1] == L'f'); + ASSERT_TRUE(token[2] == L'\0'); + + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf); + ASSERT_EQ(token, nullptr); +} From a562d1c5c9ddb8b632b96c780134e05eced6877d Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 27 Jun 2025 16:27:39 +0000 Subject: [PATCH 4/5] refactored and replaced tests to mimic strtok_r --- libc/src/wchar/wcstok.cpp | 52 +++---- libc/src/wchar/wcstok.h | 2 +- libc/test/src/wchar/wcstok_test.cpp | 204 ++++++++++++++++------------ 3 files changed, 149 insertions(+), 109 deletions(-) diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index a02332ac1fbbd..b34f6613b732f 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -13,34 +13,38 @@ namespace LIBC_NAMESPACE_DECL { +bool isADelimeter(wchar_t wc, const wchar_t *delimiters) { + for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; delim_ptr++) + if (wc == *delim_ptr) + return true; + return false; +} + LLVM_LIBC_FUNCTION(wchar_t *, wcstok, (wchar_t *__restrict str, const wchar_t *__restrict delim, - wchar_t **__restrict ptr)) { - if (str == nullptr) - str = *ptr; - - bool foundTokenStart = false; - wchar_t *out = nullptr; - wchar_t *str_ptr; - for (str_ptr = str; *str_ptr != L'\0'; str_ptr++) { - bool inDelim = false; - for (const wchar_t *delim_ptr = delim; *delim_ptr != L'\0' && !inDelim; - delim_ptr++) - if (*str_ptr == *delim_ptr) - inDelim = true; - - if (!inDelim && !foundTokenStart) { - foundTokenStart = true; - out = str_ptr; - } else if (inDelim && foundTokenStart) { - *str_ptr = L'\0'; - *ptr = str_ptr + 1; - return out; - } + wchar_t **__restrict context)) { + if (str == nullptr) { + if (*context == nullptr) + return nullptr; + + str = *context; } - *ptr = str_ptr; - return out; + wchar_t *tok_start, *tok_end; + for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim); + tok_start++) + ; + + for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim); + tok_end++) + ; + + if (*tok_end != L'\0') { + *tok_end = L'\0'; + tok_end++; + } + *context = tok_end; + return *tok_start == L'\0' ? nullptr : tok_start; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstok.h b/libc/src/wchar/wcstok.h index 9f41ea37a947a..5e673ff4e89b9 100644 --- a/libc/src/wchar/wcstok.h +++ b/libc/src/wchar/wcstok.h @@ -15,7 +15,7 @@ namespace LIBC_NAMESPACE_DECL { wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim, - wchar_t **__restrict ptr); + wchar_t **__restrict context); } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp index 79153ccc0adad..02b5b49d55ce3 100644 --- a/libc/test/src/wchar/wcstok_test.cpp +++ b/libc/test/src/wchar/wcstok_test.cpp @@ -11,135 +11,171 @@ #include "src/wchar/wcstok.h" #include "test/UnitTest/Test.h" -TEST(LlvmLibcStrTokTest, NoTokenFound) { - wchar_t empty[] = L""; - wchar_t *buf; - ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &buf), nullptr); - ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &buf), nullptr); - - wchar_t single[] = L"_"; - wchar_t *token = LIBC_NAMESPACE::wcstok(single, L"", &buf); - ASSERT_TRUE(token[0] == L'_'); - ASSERT_TRUE(token[1] == L'\0'); - - wchar_t multiple[] = L"1,2"; - token = LIBC_NAMESPACE::wcstok(multiple, L":", &buf); - ASSERT_TRUE(multiple[0] == L'1'); - ASSERT_TRUE(multiple[1] == L','); - ASSERT_TRUE(multiple[2] == L'2'); - ASSERT_TRUE(multiple[3] == L'\0'); +TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) { + { // Empty source and delimiter string. + wchar_t empty[] = L""; + wchar_t *reserve = nullptr; + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr); + } + { // Empty source and single character delimiter string. + wchar_t empty[] = L""; + wchar_t *reserve = nullptr; + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); + } + { // Same wchar_tacter source and delimiter string. + wchar_t single[] = L"_"; + wchar_t *reserve = nullptr; + ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr); + // Another call to ensure that 'reserve' is not in a bad state. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); + } + { // Multiple wchar_tacter source and single wchar_tacter delimiter string. + wchar_t multiple[] = L"1,2"; + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L','); + ASSERT_TRUE(tok[2] == L'2'); + ASSERT_TRUE(tok[3] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L','); + ASSERT_TRUE(tok[2] == L'2'); + ASSERT_TRUE(tok[3] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr); + } } -TEST(LlvmLibcStrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) { - wchar_t *buf; +TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) { wchar_t src[] = L".123"; - wchar_t *token = LIBC_NAMESPACE::wcstok(src, L".", &buf); - ASSERT_TRUE(token[0] == L'1'); - ASSERT_TRUE(token[1] == L'2'); - ASSERT_TRUE(token[2] == L'3'); - ASSERT_TRUE(token[3] == L'\0'); + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'3'); + ASSERT_TRUE(tok[3] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'3'); + ASSERT_TRUE(tok[3] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr); } -TEST(LlvmLibcStrTokTest, DelimiterIsMiddleCharacter) { +TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) { wchar_t src[] = L"12,34"; - wchar_t *buf; - wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf); - ASSERT_TRUE(token[0] == L'1'); - ASSERT_TRUE(token[1] == L'2'); - ASSERT_TRUE(token[2] == L'\0'); + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr); } -TEST(LlvmLibcStrTokTest, DelimiterAsLastCharacterShouldBeIgnored) { +TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) { wchar_t src[] = L"1234:"; - wchar_t *buf; - wchar_t *token = LIBC_NAMESPACE::wcstok(src, L":", &buf); - ASSERT_TRUE(token[0] == L'1'); - ASSERT_TRUE(token[1] == L'2'); - ASSERT_TRUE(token[2] == L'3'); - ASSERT_TRUE(token[3] == L'4'); - ASSERT_TRUE(token[4] == L'\0'); + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'3'); + ASSERT_TRUE(tok[3] == L'4'); + ASSERT_TRUE(tok[4] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'3'); + ASSERT_TRUE(tok[3] == L'4'); + ASSERT_TRUE(tok[4] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr); } -TEST(LlvmLibcStrTokTest, MultipleDelimiters) { - wchar_t src[] = L"12,.34"; - wchar_t *buf; - wchar_t *token; - - token = LIBC_NAMESPACE::wcstok(src, L".", &buf); - ASSERT_TRUE(token[0] == L'1'); - ASSERT_TRUE(token[1] == L'2'); - ASSERT_TRUE(token[2] == L','); - ASSERT_TRUE(token[3] == L'\0'); - - token = LIBC_NAMESPACE::wcstok(src, L".,", &buf); - ASSERT_TRUE(token[0] == L'1'); - ASSERT_TRUE(token[1] == L'2'); - ASSERT_TRUE(token[2] == L'\0'); - - token = LIBC_NAMESPACE::wcstok(src, L",.", &buf); - ASSERT_TRUE(token[0] == L'1'); - ASSERT_TRUE(token[1] == L'2'); - ASSERT_TRUE(token[2] == L'\0'); - - token = LIBC_NAMESPACE::wcstok(src, L":,.", &buf); - ASSERT_TRUE(token[0] == L'1'); - ASSERT_TRUE(token[1] == L'2'); - ASSERT_TRUE(token[2] == L'\0'); +TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) { + wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'}; + wchar_t *reserve = nullptr; + wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'\0'); + // Another call to ensure that 'reserve' is not in a bad state. + tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve); + ASSERT_TRUE(tok[0] == L'1'); + ASSERT_TRUE(tok[1] == L'2'); + ASSERT_TRUE(tok[2] == L'\0'); + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr); } -TEST(LlvmLibcStrTokTest, ShouldNotGoPastNullTerminator) { - wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'}; - wchar_t *buf; - wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf); - ASSERT_TRUE(token[0] == L'1'); - ASSERT_TRUE(token[1] == L'2'); - ASSERT_TRUE(token[2] == L'\0'); +TEST(LlvmLibcWCSTokReentrantTest, + ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) { + wchar_t *src = nullptr; + wchar_t *reserve = nullptr; + // Ensure that instead of crashing if src and reserve are null, nullptr is + // returned + ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr); + // And that neither src nor reserve are changed when that happens + ASSERT_EQ(src, nullptr); + ASSERT_EQ(reserve, nullptr); } -TEST(LlvmLibcStrTokTest, SubsequentCallsShouldFindFollowingDelimiters) { +TEST(LlvmLibcWCSTokReentrantTest, + SubsequentCallsShouldFindFollowingDelimiters) { wchar_t src[] = L"12,34.56"; - wchar_t *buf; - wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &buf); + wchar_t *reserve = nullptr; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve); ASSERT_TRUE(token[0] == L'1'); ASSERT_TRUE(token[1] == L'2'); ASSERT_TRUE(token[2] == L'\0'); - token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf); + token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve); ASSERT_TRUE(token[0] == L'3'); ASSERT_TRUE(token[1] == L'4'); ASSERT_TRUE(token[2] == L'\0'); - token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf); + token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve); ASSERT_TRUE(token[0] == L'5'); ASSERT_TRUE(token[1] == L'6'); ASSERT_TRUE(token[2] == L'\0'); - - token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf); + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve); ASSERT_EQ(token, nullptr); // Subsequent calls after hitting the end of the string should also return // nullptr. - token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf); + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve); ASSERT_EQ(token, nullptr); } -TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) { - wchar_t *buf; +TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) { wchar_t src[] = L"__ab__:_cd__:__ef__:__"; - wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &buf); + wchar_t *reserve = nullptr; + wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve); ASSERT_TRUE(token[0] == L'a'); ASSERT_TRUE(token[1] == L'b'); ASSERT_TRUE(token[2] == L'\0'); - token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &buf); + token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve); ASSERT_TRUE(token[0] == L'c'); ASSERT_TRUE(token[1] == L'd'); ASSERT_TRUE(token[2] == L'\0'); - token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &buf); + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve); ASSERT_TRUE(token[0] == L'e'); ASSERT_TRUE(token[1] == L'f'); ASSERT_TRUE(token[2] == L'\0'); - token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf); + token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve); ASSERT_EQ(token, nullptr); } From 15f0166593dbe7193af95e675b7c7950285cb2aa Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Mon, 30 Jun 2025 17:06:18 +0000 Subject: [PATCH 5/5] formatting --- libc/src/wchar/wcstok.cpp | 8 ++++---- libc/test/src/wchar/wcstok_test.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index b34f6613b732f..291efc15e158a 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE_DECL { bool isADelimeter(wchar_t wc, const wchar_t *delimiters) { - for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; delim_ptr++) + for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr) if (wc == *delim_ptr) return true; return false; @@ -32,16 +32,16 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok, wchar_t *tok_start, *tok_end; for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim); - tok_start++) + ++tok_start) ; for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim); - tok_end++) + ++tok_end) ; if (*tok_end != L'\0') { *tok_end = L'\0'; - tok_end++; + ++tok_end; } *context = tok_end; return *tok_start == L'\0' ? nullptr : tok_start; diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp index 02b5b49d55ce3..7106e9f2fab5e 100644 --- a/libc/test/src/wchar/wcstok_test.cpp +++ b/libc/test/src/wchar/wcstok_test.cpp @@ -28,7 +28,7 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) { ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr); ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); } - { // Same wchar_tacter source and delimiter string. + { // Same character source and delimiter string. wchar_t single[] = L"_"; wchar_t *reserve = nullptr; ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr); @@ -36,7 +36,7 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) { ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr); ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); } - { // Multiple wchar_tacter source and single wchar_tacter delimiter string. + { // Multiple character source and single character delimiter string. wchar_t multiple[] = L"1,2"; wchar_t *reserve = nullptr; wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);