Touch up PR 1175

This commit is contained in:
David Tolnay 2024-08-14 22:29:30 -07:00
parent 0f942e5b52
commit cc7a1608c9
No known key found for this signature in database
GPG Key ID: F9BA143B95FF6D82

View File

@ -877,15 +877,13 @@ fn parse_escape<'de, R: Read<'de>>(
b'r' => scratch.push(b'\r'), b'r' => scratch.push(b'\r'),
b't' => scratch.push(b'\t'), b't' => scratch.push(b'\t'),
b'u' => return parse_unicode_escape(read, validate, scratch), b'u' => return parse_unicode_escape(read, validate, scratch),
_ => { _ => return error(read, ErrorCode::InvalidEscape),
return error(read, ErrorCode::InvalidEscape);
}
} }
Ok(()) Ok(())
} }
/// Parses a JSON \u escape and appends it into the scratch space. Assumes \u /// Parses a JSON \u escape and appends it into the scratch space. Assumes `\u`
/// has just been read. /// has just been read.
#[cold] #[cold]
fn parse_unicode_escape<'de, R: Read<'de>>( fn parse_unicode_escape<'de, R: Read<'de>>(
@ -895,10 +893,10 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
) -> Result<()> { ) -> Result<()> {
let mut n = tri!(read.decode_hex_escape()); let mut n = tri!(read.decode_hex_escape());
// Non-BMP characters are encoded as a sequence of two hex // Non-BMP characters are encoded as a sequence of two hex escapes,
// escapes, representing UTF-16 surrogates. If deserializing a // representing UTF-16 surrogates. If deserializing a utf-8 string the
// utf-8 string the surrogates are required to be paired, // surrogates are required to be paired, whereas deserializing a byte string
// whereas deserializing a byte string accepts lone surrogates. // accepts lone surrogates.
if validate && n >= 0xDC00 && n <= 0xDFFF { if validate && n >= 0xDC00 && n <= 0xDFFF {
// XXX: This is actually a trailing surrogate. // XXX: This is actually a trailing surrogate.
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape); return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
@ -935,11 +933,10 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
error(read, ErrorCode::UnexpectedEndOfHexEscape) error(read, ErrorCode::UnexpectedEndOfHexEscape)
} else { } else {
push_wtf8_codepoint(n1 as u32, scratch); push_wtf8_codepoint(n1 as u32, scratch);
// The \ prior to this byte started an escape sequence, // The \ prior to this byte started an escape sequence, so we
// so we need to parse that now. This recursive call // need to parse that now. This recursive call does not blow the
// does not blow the stack on malicious input because // stack on malicious input because the escape is not \u, so it
// the escape is not \u, so it will be handled by one // will be handled by one of the easy nonrecursive cases.
// of the easy nonrecursive cases.
parse_escape(read, validate, scratch) parse_escape(read, validate, scratch)
}; };
} }
@ -956,8 +953,8 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
continue; continue;
} }
// This value is in range U+10000..=U+10FFFF, which is always a // This value is in range U+10000..=U+10FFFF, which is always a valid
// valid codepoint. // codepoint.
let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
push_wtf8_codepoint(n, scratch); push_wtf8_codepoint(n, scratch);
return Ok(()); return Ok(());