Skip to content

Commit 9a927a2

Browse files
authored
Rollup merge of #144134 - hkBst:cleanup-unicode-table-gen, r=Mark-Simulacrum
Cleanup unicode table gen Fixing clippy warnings and moving to edition 2024.
2 parents d24684e + b0073d9 commit 9a927a2

File tree

5 files changed

+38
-45
lines changed

5 files changed

+38
-45
lines changed

src/tools/unicode-table-generator/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "unicode-table-generator"
33
version = "0.1.0"
4-
edition = "2021"
4+
edition = "2024"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
77

src/tools/unicode-table-generator/src/cascading_map.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ impl RawEmitter {
2121

2222
let points = ranges
2323
.iter()
24-
.flat_map(|r| (r.start..r.end).into_iter().collect::<Vec<u32>>())
24+
.flat_map(|r| (r.start..r.end).collect::<Vec<u32>>())
2525
.collect::<Vec<u32>>();
2626

2727
println!("there are {} points", points.len());
@@ -32,30 +32,28 @@ impl RawEmitter {
3232
// assert that there is no whitespace over the 0x3000 range.
3333
assert!(point <= 0x3000, "the highest unicode whitespace value has changed");
3434
let high_bytes = point as usize >> 8;
35-
let codepoints = codepoints_by_high_bytes.entry(high_bytes).or_insert_with(Vec::new);
35+
let codepoints = codepoints_by_high_bytes.entry(high_bytes).or_default();
3636
codepoints.push(point);
3737
}
3838

3939
let mut bit_for_high_byte = 1u8;
4040
let mut arms = Vec::<String>::new();
4141

42-
let mut high_bytes: Vec<usize> =
43-
codepoints_by_high_bytes.keys().map(|k| k.clone()).collect();
42+
let mut high_bytes: Vec<usize> = codepoints_by_high_bytes.keys().copied().collect();
4443
high_bytes.sort();
4544
for high_byte in high_bytes {
4645
let codepoints = codepoints_by_high_bytes.get_mut(&high_byte).unwrap();
4746
if codepoints.len() == 1 {
4847
let ch = codepoints.pop().unwrap();
49-
arms.push(format!("{} => c as u32 == {:#04x}", high_byte, ch));
48+
arms.push(format!("{high_byte} => c as u32 == {ch:#04x}"));
5049
continue;
5150
}
5251
// more than 1 codepoint in this arm
5352
for codepoint in codepoints {
5453
map[(*codepoint & 0xff) as usize] |= bit_for_high_byte;
5554
}
5655
arms.push(format!(
57-
"{} => WHITESPACE_MAP[c as usize & 0xff] & {} != 0",
58-
high_byte, bit_for_high_byte
56+
"{high_byte} => WHITESPACE_MAP[c as usize & 0xff] & {bit_for_high_byte} != 0"
5957
));
6058
bit_for_high_byte <<= 1;
6159
}
@@ -68,7 +66,7 @@ impl RawEmitter {
6866
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
6967
writeln!(&mut self.file, " match c as u32 >> 8 {{").unwrap();
7068
for arm in arms {
71-
writeln!(&mut self.file, " {},", arm).unwrap();
69+
writeln!(&mut self.file, " {arm},").unwrap();
7270
}
7371
writeln!(&mut self.file, " _ => false,").unwrap();
7472
writeln!(&mut self.file, " }}").unwrap();

src/tools/unicode-table-generator/src/case_mapping.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ const INDEX_MASK: u32 = 1 << 22;
99
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
1010
let mut file = String::new();
1111

12-
write!(file, "const INDEX_MASK: u32 = 0x{:x};", INDEX_MASK).unwrap();
12+
write!(file, "const INDEX_MASK: u32 = 0x{INDEX_MASK:x};").unwrap();
1313
file.push_str("\n\n");
1414
file.push_str(HEADER.trim_start());
1515
file.push('\n');

src/tools/unicode-table-generator/src/main.rs

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -160,15 +160,15 @@ fn load_data() -> UnicodeData {
160160
.push(Codepoints::Single(row.codepoint));
161161
}
162162

163-
if let Some(mapped) = row.simple_lowercase_mapping {
164-
if mapped != row.codepoint {
165-
to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0));
166-
}
163+
if let Some(mapped) = row.simple_lowercase_mapping
164+
&& mapped != row.codepoint
165+
{
166+
to_lower.insert(row.codepoint.value(), (mapped.value(), 0, 0));
167167
}
168-
if let Some(mapped) = row.simple_uppercase_mapping {
169-
if mapped != row.codepoint {
170-
to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0));
171-
}
168+
if let Some(mapped) = row.simple_uppercase_mapping
169+
&& mapped != row.codepoint
170+
{
171+
to_upper.insert(row.codepoint.value(), (mapped.value(), 0, 0));
172172
}
173173
}
174174

@@ -196,12 +196,12 @@ fn load_data() -> UnicodeData {
196196
.flat_map(|codepoints| match codepoints {
197197
Codepoints::Single(c) => c
198198
.scalar()
199-
.map(|ch| (ch as u32..ch as u32 + 1))
199+
.map(|ch| ch as u32..ch as u32 + 1)
200200
.into_iter()
201201
.collect::<Vec<_>>(),
202202
Codepoints::Range(c) => c
203203
.into_iter()
204-
.flat_map(|c| c.scalar().map(|ch| (ch as u32..ch as u32 + 1)))
204+
.flat_map(|c| c.scalar().map(|ch| ch as u32..ch as u32 + 1))
205205
.collect::<Vec<_>>(),
206206
})
207207
.collect::<Vec<Range<u32>>>(),
@@ -236,7 +236,7 @@ fn main() {
236236
let ranges_by_property = &unicode_data.ranges;
237237

238238
if let Some(path) = test_path {
239-
std::fs::write(&path, generate_tests(&write_location, &ranges_by_property)).unwrap();
239+
std::fs::write(&path, generate_tests(&write_location, ranges_by_property)).unwrap();
240240
}
241241

242242
let mut total_bytes = 0;
@@ -246,9 +246,9 @@ fn main() {
246246

247247
let mut emitter = RawEmitter::new();
248248
if property == &"White_Space" {
249-
emit_whitespace(&mut emitter, &ranges);
249+
emit_whitespace(&mut emitter, ranges);
250250
} else {
251-
emit_codepoints(&mut emitter, &ranges);
251+
emit_codepoints(&mut emitter, ranges);
252252
}
253253

254254
modules.push((property.to_lowercase().to_string(), emitter.file));
@@ -288,7 +288,7 @@ fn main() {
288288
for line in contents.lines() {
289289
if !line.trim().is_empty() {
290290
table_file.push_str(" ");
291-
table_file.push_str(&line);
291+
table_file.push_str(line);
292292
}
293293
table_file.push('\n');
294294
}
@@ -312,15 +312,15 @@ fn version() -> String {
312312
let start = readme.find(prefix).unwrap() + prefix.len();
313313
let end = readme.find(" of the Unicode Standard.").unwrap();
314314
let version =
315-
readme[start..end].split('.').map(|v| v.parse::<u32>().expect(&v)).collect::<Vec<_>>();
315+
readme[start..end].split('.').map(|v| v.parse::<u32>().expect(v)).collect::<Vec<_>>();
316316
let [major, minor, micro] = [version[0], version[1], version[2]];
317317

318318
out.push_str(&format!("({major}, {minor}, {micro});\n"));
319319
out
320320
}
321321

322322
fn fmt_list<V: std::fmt::Debug>(values: impl IntoIterator<Item = V>) -> String {
323-
let pieces = values.into_iter().map(|b| format!("{:?}, ", b)).collect::<Vec<_>>();
323+
let pieces = values.into_iter().map(|b| format!("{b:?}, ")).collect::<Vec<_>>();
324324
let mut out = String::new();
325325
let mut line = String::from("\n ");
326326
for piece in pieces {
@@ -348,7 +348,7 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
348348
s.push_str("\nfn main() {\n");
349349

350350
for (property, ranges) in ranges {
351-
s.push_str(&format!(r#" println!("Testing {}");"#, property));
351+
s.push_str(&format!(r#" println!("Testing {property}");"#));
352352
s.push('\n');
353353
s.push_str(&format!(" {}_true();\n", property.to_lowercase()));
354354
s.push_str(&format!(" {}_false();\n", property.to_lowercase()));
@@ -373,7 +373,7 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
373373
s.push_str(" }\n\n");
374374
}
375375

376-
s.push_str("}");
376+
s.push('}');
377377
s
378378
}
379379

@@ -388,7 +388,7 @@ fn generate_asserts(s: &mut String, property: &str, points: &[u32], truthy: bool
388388
range.start,
389389
));
390390
} else {
391-
s.push_str(&format!(" for chn in {:?}u32 {{\n", range));
391+
s.push_str(&format!(" for chn in {range:?}u32 {{\n"));
392392
s.push_str(&format!(
393393
" assert!({}unicode_data::{}::lookup(std::char::from_u32(chn).unwrap()), \"{{:?}}\", chn);\n",
394394
if truthy { "" } else { "!" },
@@ -439,7 +439,7 @@ fn merge_ranges(ranges: &mut Vec<Range<u32>>) {
439439
let mut last_end = None;
440440
for range in ranges {
441441
if let Some(last) = last_end {
442-
assert!(range.start > last, "{:?}", range);
442+
assert!(range.start > last, "{range:?}");
443443
}
444444
last_end = Some(range.end);
445445
}

src/tools/unicode-table-generator/src/raw_emitter.rs

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,10 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
156156
emitter.blank_line();
157157

158158
let mut bitset = emitter.clone();
159-
let bitset_ok = bitset.emit_bitset(&ranges).is_ok();
159+
let bitset_ok = bitset.emit_bitset(ranges).is_ok();
160160

161161
let mut skiplist = emitter.clone();
162-
skiplist.emit_skiplist(&ranges);
162+
skiplist.emit_skiplist(ranges);
163163

164164
if bitset_ok && bitset.bytes_used <= skiplist.bytes_used {
165165
*emitter = bitset;
@@ -174,7 +174,7 @@ pub fn emit_whitespace(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
174174
emitter.blank_line();
175175

176176
let mut cascading = emitter.clone();
177-
cascading.emit_cascading_map(&ranges);
177+
cascading.emit_cascading_map(ranges);
178178
*emitter = cascading;
179179
emitter.desc = String::from("cascading");
180180
}
@@ -272,7 +272,7 @@ impl Canonicalized {
272272
// for canonical when possible.
273273
while let Some((&to, _)) = mappings
274274
.iter()
275-
.find(|(&to, _)| to == 0)
275+
.find(|&(&to, _)| to == 0)
276276
.or_else(|| mappings.iter().max_by_key(|m| m.1.len()))
277277
{
278278
// Get the mapping with the most entries. Currently, no mapping can
@@ -311,10 +311,9 @@ impl Canonicalized {
311311
}
312312
}
313313
}
314-
assert!(
315-
unique_mapping
316-
.insert(to, UniqueMapping::Canonical(canonical_words.len()))
317-
.is_none()
314+
assert_eq!(
315+
unique_mapping.insert(to, UniqueMapping::Canonical(canonical_words.len())),
316+
None
318317
);
319318
canonical_words.push(to);
320319

@@ -340,14 +339,10 @@ impl Canonicalized {
340339
// We'll probably always have some slack though so this loop will still
341340
// be needed.
342341
for &w in unique_words {
343-
if !unique_mapping.contains_key(&w) {
344-
assert!(
345-
unique_mapping
346-
.insert(w, UniqueMapping::Canonical(canonical_words.len()))
347-
.is_none()
348-
);
342+
unique_mapping.entry(w).or_insert_with(|| {
349343
canonical_words.push(w);
350-
}
344+
UniqueMapping::Canonical(canonical_words.len())
345+
});
351346
}
352347
assert_eq!(canonicalized_words.len() + canonical_words.len(), unique_words.len());
353348
assert_eq!(unique_mapping.len(), unique_words.len());

0 commit comments

Comments
 (0)