mirror of
https://github.com/serde-rs/json.git
synced 2025-10-04 16:25:42 +00:00
Use the same UTF-8/WTF-8 impl for surrogates
This does not affect performance.
This commit is contained in:
parent
0e90b61b8c
commit
2f28d106e6
25
src/read.rs
25
src/read.rs
@ -898,20 +898,12 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
|
|||||||
validate: bool,
|
validate: bool,
|
||||||
scratch: &mut Vec<u8>,
|
scratch: &mut Vec<u8>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
fn encode_surrogate(scratch: &mut Vec<u8>, n: u16) {
|
|
||||||
scratch.extend_from_slice(&[
|
|
||||||
(n >> 12 & 0b0000_1111) as u8 | 0b1110_0000,
|
|
||||||
(n >> 6 & 0b0011_1111) as u8 | 0b1000_0000,
|
|
||||||
(n & 0b0011_1111) as u8 | 0b1000_0000,
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
let c = match tri!(read.decode_hex_escape()) {
|
let c = match tri!(read.decode_hex_escape()) {
|
||||||
n @ 0xDC00..=0xDFFF => {
|
n @ 0xDC00..=0xDFFF => {
|
||||||
return if validate {
|
return if validate {
|
||||||
error(read, ErrorCode::LoneLeadingSurrogateInHexEscape)
|
error(read, ErrorCode::LoneLeadingSurrogateInHexEscape)
|
||||||
} else {
|
} else {
|
||||||
encode_surrogate(scratch, n);
|
push_wtf8_codepoint(n as u32, scratch);
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -928,7 +920,7 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
|
|||||||
read.discard();
|
read.discard();
|
||||||
error(read, ErrorCode::UnexpectedEndOfHexEscape)
|
error(read, ErrorCode::UnexpectedEndOfHexEscape)
|
||||||
} else {
|
} else {
|
||||||
encode_surrogate(scratch, n1);
|
push_wtf8_codepoint(n1 as u32, scratch);
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -940,7 +932,7 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
|
|||||||
read.discard();
|
read.discard();
|
||||||
error(read, ErrorCode::UnexpectedEndOfHexEscape)
|
error(read, ErrorCode::UnexpectedEndOfHexEscape)
|
||||||
} else {
|
} else {
|
||||||
encode_surrogate(scratch, n1);
|
push_wtf8_codepoint(n1 as u32, scratch);
|
||||||
// The \ prior to this byte started an escape sequence,
|
// The \ prior to this byte started an escape sequence,
|
||||||
// so we need to parse that now. This recursive call
|
// so we need to parse that now. This recursive call
|
||||||
// does not blow the stack on malicious input because
|
// does not blow the stack on malicious input because
|
||||||
@ -966,17 +958,14 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
|
|||||||
n => n as u32,
|
n => n as u32,
|
||||||
};
|
};
|
||||||
|
|
||||||
// SAFETY: c is always a codepoint.
|
push_wtf8_codepoint(c, scratch);
|
||||||
unsafe {
|
|
||||||
push_utf8_codepoint(c, scratch);
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Adds a UTF-8 codepoint to the end of the buffer. This is a more efficient
|
/// Adds a WTF-8 codepoint to the end of the buffer. This is a more efficient
|
||||||
/// implementation of String::push. n must be a valid codepoint.
|
/// implementation of String::push. The codepoint may be a surrogate.
|
||||||
#[inline]
|
#[inline]
|
||||||
unsafe fn push_utf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
|
fn push_wtf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
|
||||||
if n < 0x80 {
|
if n < 0x80 {
|
||||||
scratch.push(n as u8);
|
scratch.push(n as u8);
|
||||||
return;
|
return;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user