Skip to content

Commit fc0ca07

Browse files
authored
Merge pull request #831 from serde-rs/from_u32
Skip error codepath on hex escape outside the surrogate range
2 parents 76e376c + 4a0924c commit fc0ca07

File tree

1 file changed

+26
-33
lines changed

1 file changed

+26
-33
lines changed

src/read.rs

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -926,12 +926,9 @@ fn parse_escape<'de, R: Read<'de>>(
926926
}
927927
}
928928

929-
n => match char::from_u32(n as u32) {
930-
Some(c) => c,
931-
None => {
932-
return error(read, ErrorCode::InvalidUnicodeCodePoint);
933-
}
934-
},
929+
// Every u16 outside of the surrogate ranges above is guaranteed
930+
// to be a legal char.
931+
n => char::from_u32(n as u32).unwrap(),
935932
};
936933

937934
scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
@@ -954,38 +951,34 @@ where
954951

955952
match ch {
956953
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
957-
b'u' => {
958-
let n = match tri!(read.decode_hex_escape()) {
959-
0xDC00..=0xDFFF => {
960-
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
961-
}
962-
963-
// Non-BMP characters are encoded as a sequence of
964-
// two hex escapes, representing UTF-16 surrogates.
965-
n1 @ 0xD800..=0xDBFF => {
966-
if tri!(next_or_eof(read)) != b'\\' {
967-
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
968-
}
969-
if tri!(next_or_eof(read)) != b'u' {
970-
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
971-
}
972-
973-
let n2 = tri!(read.decode_hex_escape());
974-
975-
if n2 < 0xDC00 || n2 > 0xDFFF {
976-
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
977-
}
954+
b'u' => match tri!(read.decode_hex_escape()) {
955+
0xDC00..=0xDFFF => {
956+
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
957+
}
978958

979-
(((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000
959+
// Non-BMP characters are encoded as a sequence of
960+
// two hex escapes, representing UTF-16 surrogates.
961+
n1 @ 0xD800..=0xDBFF => {
962+
if tri!(next_or_eof(read)) != b'\\' {
963+
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
964+
}
965+
if tri!(next_or_eof(read)) != b'u' {
966+
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
980967
}
981968

982-
n => n as u32,
983-
};
969+
let n2 = tri!(read.decode_hex_escape());
970+
if n2 < 0xDC00 || n2 > 0xDFFF {
971+
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
972+
}
984973

985-
if char::from_u32(n).is_none() {
986-
return error(read, ErrorCode::InvalidUnicodeCodePoint);
974+
let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
975+
if char::from_u32(n).is_none() {
976+
return error(read, ErrorCode::InvalidUnicodeCodePoint);
977+
}
987978
}
988-
}
979+
980+
_ => {}
981+
},
989982
_ => {
990983
return error(read, ErrorCode::InvalidEscape);
991984
}

0 commit comments

Comments
 (0)