From a7c0897a398daa9093c5e246cad69f8ebd1a413f Mon Sep 17 00:00:00 2001 From: Pazzaz Date: Wed, 4 Jul 2018 20:35:33 +0200 Subject: [PATCH 1/3] Only convert `char`s that need it in str case conversion methods. --- src/liballoc/str.rs | 56 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index bb99d0401d3cd..8f79af9a35367 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -360,18 +360,35 @@ impl str { #[stable(feature = "unicode_case_mapping", since = "1.2.0")] pub fn to_lowercase(&self) -> String { let mut s = String::with_capacity(self.len()); + let mut lower = 0; for (i, c) in self[..].char_indices() { - if c == 'Σ' { - // Σ maps to σ, except at the end of a word where it maps to ς. - // This is the only conditional (contextual) but language-independent mapping - // in `SpecialCasing.txt`, - // so hard-code it rather than have a generic "condition" mechanism. - // See https://github.com/rust-lang/rust/issues/26035 - map_uppercase_sigma(self, i, &mut s) - } else { - s.extend(c.to_lowercase()); + // Lowercase `char`s are inserted into `s` in slices, + // uppercase `char`s are converted to lowercase and inserted individually. + if c.is_uppercase() { + if lower != i { + unsafe { + // lower..i is an interval of lowercase `char`s before `c`. + s.push_str((lower..i).get_unchecked(self)); + } + } + if c == 'Σ' { + // Σ maps to σ, except at the end of a word where it maps to ς. + // This is the only conditional (contextual) but language-independent mapping + // in `SpecialCasing.txt`, + // so hard-code it rather than have a generic "condition" mechanism. + // See https://github.com/rust-lang/rust/issues/26035 + map_uppercase_sigma(self, i, &mut s); + } else { + s.extend(c.to_lowercase()); + } + + // The next possible interval of lowercase `char`s start after `c`. + lower = i + c.len_utf8(); } } + unsafe { + s.push_str((lower..self.len()).get_unchecked(self)); + } return s; fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) { @@ -423,7 +440,26 @@ impl str { #[stable(feature = "unicode_case_mapping", since = "1.2.0")] pub fn to_uppercase(&self) -> String { let mut s = String::with_capacity(self.len()); - s.extend(self.chars().flat_map(|c| c.to_uppercase())); + let mut lower = 0; + for (i, c) in self[..].char_indices() { + // Uppercase `char`s are inserted into `s` in slices, + // lowercase `char`s are converted to uppercase and inserted individually. + if c.is_lowercase() { + if lower != i { + unsafe { + // lower..i is an interval of uppercase `char`s before `c`. + s.push_str((lower..i).get_unchecked(self)); + } + } + s.extend(c.to_uppercase()); + + // The next possible interval of uppercase `char`s start after `c`. + lower = i + c.len_utf8(); + } + } + unsafe { + s.push_str((lower..self.len()).get_unchecked(self)); + } return s; } From 3e49134027dcc436ecaf978198b1f15bef9996af Mon Sep 17 00:00:00 2001 From: Pazzaz Date: Wed, 4 Jul 2018 22:14:12 +0200 Subject: [PATCH 2/3] Enable feature slice_index_methods in liballoc --- src/liballoc/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/liballoc/lib.rs b/src/liballoc/lib.rs index 493448eaf88fa..2d3a9569f49ba 100644 --- a/src/liballoc/lib.rs +++ b/src/liballoc/lib.rs @@ -106,6 +106,7 @@ #![feature(ptr_internals)] #![feature(ptr_offset_from)] #![feature(rustc_attrs)] +#![feature(slice_index_methods)] #![feature(specialization)] #![feature(split_ascii_whitespace)] #![feature(staged_api)] From fde8e5935dbed480e8f1f6469f7ba780c163c492 Mon Sep 17 00:00:00 2001 From: Pazzaz Date: Wed, 4 Jul 2018 22:29:13 +0200 Subject: [PATCH 3/3] Trim trailing whitespace --- src/liballoc/str.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/liballoc/str.rs b/src/liballoc/str.rs index 8f79af9a35367..9ac7fd0dcda2d 100644 --- a/src/liballoc/str.rs +++ b/src/liballoc/str.rs @@ -381,7 +381,7 @@ impl str { } else { s.extend(c.to_lowercase()); } - + // The next possible interval of lowercase `char`s start after `c`. lower = i + c.len_utf8(); }