Skip to content

[libc] wcstok implementation #145989

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wmemchr
libc.src.wchar.wcpcpy
libc.src.wchar.wcpncpy
libc.src.wchar.wcstok

# sys/uio.h entrypoints
libc.src.sys.uio.writev
Expand Down
8 changes: 8 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,14 @@ functions:
arguments:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- name: wcstok
standards:
- stdc
return_type: wchar_t *
arguments:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- type: wchar_t** __restrict
- name: wcpcpy
standards:
- stdc
Expand Down
10 changes: 10 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)

add_entrypoint_object(
wcstok
SRCS
wcstok.cpp
HDRS
wcstok.h
DEPENDS
libc.hdr.types.wchar_t
)

add_entrypoint_object(
wcrtomb
SRCS
Expand Down
50 changes: 50 additions & 0 deletions libc/src/wchar/wcstok.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===-- Implementation of wcstok ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/wcstok.h"

#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"

namespace LIBC_NAMESPACE_DECL {

bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; delim_ptr++)
if (wc == *delim_ptr)
return true;
return false;
}

LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
(wchar_t *__restrict str, const wchar_t *__restrict delim,
wchar_t **__restrict context)) {
if (str == nullptr) {
if (*context == nullptr)
return nullptr;

str = *context;
}

wchar_t *tok_start, *tok_end;
for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
tok_start++)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: prefer preincrement over postincrement

;

for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
tok_end++)
;

if (*tok_end != L'\0') {
*tok_end = L'\0';
tok_end++;
}
*context = tok_end;
return *tok_start == L'\0' ? nullptr : tok_start;
}

} // namespace LIBC_NAMESPACE_DECL
22 changes: 22 additions & 0 deletions libc/src/wchar/wcstok.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Implementation header for wcstok ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOK_H
#define LLVM_LIBC_SRC_WCHAR_WCSTOK_H

#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
wchar_t **__restrict context);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_WCSTOK_H
10 changes: 10 additions & 0 deletions libc/test/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,16 @@ add_libc_test(
libc.src.wchar.wcschr
)

add_libc_test(
wcstok_test
SUITE
libc_wchar_unittests
SRCS
wcstok_test.cpp
DEPENDS
libc.src.wchar.wcstok
)

add_libc_test(
wcsncmp_test
SUITE
Expand Down
181 changes: 181 additions & 0 deletions libc/test/src/wchar/wcstok_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
//===-- Unittests for wcstok ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/wchar/wcstok.h"
#include "test/UnitTest/Test.h"

TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
{ // Empty source and delimiter string.
wchar_t empty[] = L"";
wchar_t *reserve = nullptr;
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
}
{ // Empty source and single character delimiter string.
wchar_t empty[] = L"";
wchar_t *reserve = nullptr;
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
}
{ // Same wchar_tacter source and delimiter string.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

find/replace error (char -> wchar_t) here and in a few other comments

wchar_t single[] = L"_";
wchar_t *reserve = nullptr;
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
}
{ // Multiple wchar_tacter source and single wchar_tacter delimiter string.
wchar_t multiple[] = L"1,2";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L',');
ASSERT_TRUE(tok[2] == L'2');
ASSERT_TRUE(tok[3] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L',');
ASSERT_TRUE(tok[2] == L'2');
ASSERT_TRUE(tok[3] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
}
}

TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
wchar_t src[] = L".123";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'3');
ASSERT_TRUE(tok[3] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'3');
ASSERT_TRUE(tok[3] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr);
}

TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) {
wchar_t src[] = L"12,34";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
}

TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
wchar_t src[] = L"1234:";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'3');
ASSERT_TRUE(tok[3] == L'4');
ASSERT_TRUE(tok[4] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'3');
ASSERT_TRUE(tok[3] == L'4');
ASSERT_TRUE(tok[4] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
}

TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) {
wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'\0');
// Another call to ensure that 'reserve' is not in a bad state.
tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
ASSERT_TRUE(tok[0] == L'1');
ASSERT_TRUE(tok[1] == L'2');
ASSERT_TRUE(tok[2] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
}

TEST(LlvmLibcWCSTokReentrantTest,
ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) {
wchar_t *src = nullptr;
wchar_t *reserve = nullptr;
// Ensure that instead of crashing if src and reserve are null, nullptr is
// returned
ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr);
// And that neither src nor reserve are changed when that happens
ASSERT_EQ(src, nullptr);
ASSERT_EQ(reserve, nullptr);
}

TEST(LlvmLibcWCSTokReentrantTest,
SubsequentCallsShouldFindFollowingDelimiters) {
wchar_t src[] = L"12,34.56";
wchar_t *reserve = nullptr;
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve);
ASSERT_TRUE(token[0] == L'1');
ASSERT_TRUE(token[1] == L'2');
ASSERT_TRUE(token[2] == L'\0');

token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
ASSERT_TRUE(token[0] == L'3');
ASSERT_TRUE(token[1] == L'4');
ASSERT_TRUE(token[2] == L'\0');

token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
ASSERT_TRUE(token[0] == L'5');
ASSERT_TRUE(token[1] == L'6');
ASSERT_TRUE(token[2] == L'\0');
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
// Subsequent calls after hitting the end of the string should also return
// nullptr.
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
}

TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
wchar_t src[] = L"__ab__:_cd__:__ef__:__";
wchar_t *reserve = nullptr;
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve);
ASSERT_TRUE(token[0] == L'a');
ASSERT_TRUE(token[1] == L'b');
ASSERT_TRUE(token[2] == L'\0');

token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve);
ASSERT_TRUE(token[0] == L'c');
ASSERT_TRUE(token[1] == L'd');
ASSERT_TRUE(token[2] == L'\0');

token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve);
ASSERT_TRUE(token[0] == L'e');
ASSERT_TRUE(token[1] == L'f');
ASSERT_TRUE(token[2] == L'\0');

token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
}
Loading