Skip to content

Commit 97d49b8

Browse files
authored
Merge pull request #62931 from lorentey/character-recognizer-conformances-5.8
[5.8][stdlib] _CharacterRecognizer: Add Sendable, Equatable, CustomStringConvertible conformances
2 parents 24fe6ae + 62702f4 commit 97d49b8

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ extension Unicode.Scalar {
407407
}
408408
}
409409

410-
internal struct _GraphemeBreakingState {
410+
internal struct _GraphemeBreakingState: Sendable, Equatable {
411411
// When we're looking through an indic sequence, one of the requirements is
412412
// that there is at LEAST 1 Virama present between two linking consonants.
413413
// This value helps ensure that when we ultimately need to decide whether or
@@ -436,6 +436,18 @@ internal struct _GraphemeBreakingState {
436436
var shouldBreakRI = false
437437
}
438438

439+
extension _GraphemeBreakingState: CustomStringConvertible {
440+
var description: String {
441+
var r = "["
442+
if hasSeenVirama { r += "V" }
443+
if isInEmojiSequence { r += "E" }
444+
if isInIndicSequence { r += "I" }
445+
if shouldBreakRI { r += "R" }
446+
r += "]"
447+
return r
448+
}
449+
}
450+
439451
extension Unicode {
440452
/// A state machine for recognizing character (i.e., extended grapheme
441453
/// cluster) boundaries in an arbitrary series of Unicode scalars.
@@ -448,7 +460,7 @@ extension Unicode {
448460
/// `String` splits its contents into `Character` values.
449461
@available(SwiftStdlib 5.8, *)
450462
public // SPI(Foundation) FIXME: We need API for this
451-
struct _CharacterRecognizer {
463+
struct _CharacterRecognizer: Sendable {
452464
internal var _previous: Unicode.Scalar
453465
internal var _state: _GraphemeBreakingState
454466

@@ -547,6 +559,21 @@ extension Unicode {
547559
}
548560
}
549561

562+
@available(SwiftStdlib 5.8, *)
563+
extension Unicode._CharacterRecognizer: Equatable {
564+
public static func ==(left: Self, right: Self) -> Bool {
565+
left._previous == right._previous && left._state == right._state
566+
}
567+
}
568+
569+
@available(SwiftStdlib 5.8, *)
570+
extension Unicode._CharacterRecognizer: CustomStringConvertible {
571+
public var description: String {
572+
return "\(_state)U+\(String(_previous.value, radix: 16, uppercase: true))"
573+
}
574+
}
575+
576+
550577
extension _StringGuts {
551578
// Returns the stride of the grapheme cluster starting at offset `index`,
552579
// assuming it is on a grapheme cluster boundary.

test/stdlib/CharacterRecognizer.swift

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,36 @@ if #available(SwiftStdlib 5.8, *) {
114114
""")
115115
}
116116
}
117+
118+
if #available(SwiftStdlib 5.8, *) {
119+
suite.test("Equatable") {
120+
var r1 = Unicode._CharacterRecognizer()
121+
var r2 = Unicode._CharacterRecognizer()
122+
expectEqual(r1, r2)
123+
expectTrue(r1.hasBreak(before: "a"))
124+
expectNotEqual(r1, r2)
125+
expectTrue(r2.hasBreak(before: "a"))
126+
expectEqual(r1, r2)
127+
expectTrue(r2.hasBreak(before: "\u{1f44f}")) // CLAPPING HANDS SIGN
128+
expectNotEqual(r1, r2)
129+
expectTrue(r1.hasBreak(before: "b"))
130+
expectNotEqual(r1, r2)
131+
expectFalse(r2.hasBreak(before: "\u{1f3fc}")) // EMOJI MODIFIER FITZPATRICK TYPE-3
132+
expectNotEqual(r1, r2)
133+
expectTrue(r2.hasBreak(before: "b"))
134+
expectEqual(r1, r2) // breaks should reset state
135+
}
136+
}
137+
138+
if #available(SwiftStdlib 5.8, *) {
139+
suite.test("CustomStringConvertible") {
140+
var r = Unicode._CharacterRecognizer()
141+
expectEqual("\(r)", "[]U+0")
142+
expectTrue(r.hasBreak(before: "\u{1F1FA}")) // REGIONAL INDICATOR SYMBOL LETTER U
143+
expectEqual("\(r)", "[]U+1F1FA")
144+
expectFalse(r.hasBreak(before: "\u{1F1F8}")) // REGIONAL INDICATOR SYMBOL LETTER S
145+
expectEqual("\(r)", "[R]U+1F1F8")
146+
expectTrue(r.hasBreak(before: "$"))
147+
expectEqual("\(r)", "[]U+24")
148+
}
149+
}

0 commit comments

Comments
 (0)