mirror of
https://github.com/serde-rs/json.git
synced 2025-09-30 14:32:05 +00:00
Merge pull request #830 from lucacasonato/support_lone_surrogates_in_raw_value
Allow lone surrogates in raw values
This commit is contained in:
commit
7e56a406e5
35
src/read.rs
35
src/read.rs
@ -954,34 +954,15 @@ where
|
||||
|
||||
match ch {
|
||||
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
|
||||
b'u' => match tri!(read.decode_hex_escape()) {
|
||||
0xDC00..=0xDFFF => {
|
||||
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
|
||||
}
|
||||
b'u' => {
|
||||
// At this point we don't care if the codepoint is valid. We just
|
||||
// want to consume it. We don't actually know what is valid or not
|
||||
// at this point, because that depends on if this string will
|
||||
// ultimately be parsed into a string or a byte buffer in the "real"
|
||||
// parse.
|
||||
|
||||
// Non-BMP characters are encoded as a sequence of
|
||||
// two hex escapes, representing UTF-16 surrogates.
|
||||
n1 @ 0xD800..=0xDBFF => {
|
||||
if tri!(next_or_eof(read)) != b'\\' {
|
||||
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
|
||||
}
|
||||
if tri!(next_or_eof(read)) != b'u' {
|
||||
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
|
||||
}
|
||||
|
||||
let n2 = tri!(read.decode_hex_escape());
|
||||
if n2 < 0xDC00 || n2 > 0xDFFF {
|
||||
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
|
||||
}
|
||||
|
||||
let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
|
||||
if char::from_u32(n).is_none() {
|
||||
return error(read, ErrorCode::InvalidUnicodeCodePoint);
|
||||
}
|
||||
}
|
||||
|
||||
_ => {}
|
||||
},
|
||||
tri!(read.decode_hex_escape());
|
||||
}
|
||||
_ => {
|
||||
return error(read, ErrorCode::InvalidEscape);
|
||||
}
|
||||
|
@ -1740,6 +1740,20 @@ fn test_byte_buf_de_lone_surrogate() {
|
||||
assert!(res.is_err());
|
||||
}
|
||||
|
||||
#[cfg(feature = "raw_value")]
|
||||
#[test]
|
||||
fn test_raw_de_lone_surrogate() {
|
||||
use serde_json::value::RawValue;
|
||||
|
||||
assert!(from_str::<Box<RawValue>>(r#""\ud83c""#).is_ok());
|
||||
assert!(from_str::<Box<RawValue>>(r#""\ud83c\n""#).is_ok());
|
||||
assert!(from_str::<Box<RawValue>>(r#""\ud83c ""#).is_ok());
|
||||
assert!(from_str::<Box<RawValue>>(r#""\udc01 ""#).is_ok());
|
||||
assert!(from_str::<Box<RawValue>>(r#""\udc01\!""#).is_err());
|
||||
assert!(from_str::<Box<RawValue>>(r#""\udc01\u""#).is_err());
|
||||
assert!(from_str::<Box<RawValue>>(r#""\ud83c\ud83c""#).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_byte_buf_de_multiple() {
|
||||
let s: Vec<ByteBuf> = from_str(r#"["ab\nc", "cd\ne"]"#).unwrap();
|
||||
|
Loading…
x
Reference in New Issue
Block a user