mirror of
https://github.com/serde-rs/json.git
synced 2025-10-02 23:35:59 +00:00
Allow lone surrogates in raw values
This commit is contained in:
parent
fc0ca07b10
commit
51e9616dee
33
src/read.rs
33
src/read.rs
@ -951,34 +951,15 @@ where
|
|||||||
|
|
||||||
match ch {
|
match ch {
|
||||||
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
|
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
|
||||||
b'u' => match tri!(read.decode_hex_escape()) {
|
b'u' => {
|
||||||
0xDC00..=0xDFFF => {
|
// At this point we don't care if the codepoint is valid. We just
|
||||||
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
|
// want to consume it. We don't actually know what is valid or not
|
||||||
}
|
// at this point, because that depends on if this string will
|
||||||
|
// ultimately be parsed into a string or a byte buffer in the "real"
|
||||||
|
// parse.
|
||||||
|
|
||||||
// Non-BMP characters are encoded as a sequence of
|
tri!(read.decode_hex_escape());
|
||||||
// two hex escapes, representing UTF-16 surrogates.
|
|
||||||
n1 @ 0xD800..=0xDBFF => {
|
|
||||||
if tri!(next_or_eof(read)) != b'\\' {
|
|
||||||
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
|
|
||||||
}
|
}
|
||||||
if tri!(next_or_eof(read)) != b'u' {
|
|
||||||
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
|
|
||||||
}
|
|
||||||
|
|
||||||
let n2 = tri!(read.decode_hex_escape());
|
|
||||||
if n2 < 0xDC00 || n2 > 0xDFFF {
|
|
||||||
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
|
|
||||||
}
|
|
||||||
|
|
||||||
let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
|
|
||||||
if char::from_u32(n).is_none() {
|
|
||||||
return error(read, ErrorCode::InvalidUnicodeCodePoint);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => {}
|
|
||||||
},
|
|
||||||
_ => {
|
_ => {
|
||||||
return error(read, ErrorCode::InvalidEscape);
|
return error(read, ErrorCode::InvalidEscape);
|
||||||
}
|
}
|
||||||
|
@ -1742,6 +1742,20 @@ fn test_byte_buf_de_lone_surrogate() {
|
|||||||
assert!(res.is_err());
|
assert!(res.is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "raw_value")]
|
||||||
|
#[test]
|
||||||
|
fn test_raw_de_lone_surrogate() {
|
||||||
|
use serde_json::value::RawValue;
|
||||||
|
|
||||||
|
assert!(from_str::<Box<RawValue>>(r#""\ud83c""#).is_ok());
|
||||||
|
assert!(from_str::<Box<RawValue>>(r#""\ud83c\n""#).is_ok());
|
||||||
|
assert!(from_str::<Box<RawValue>>(r#""\ud83c ""#).is_ok());
|
||||||
|
assert!(from_str::<Box<RawValue>>(r#""\udc01 ""#).is_ok());
|
||||||
|
assert!(from_str::<Box<RawValue>>(r#""\udc01\!""#).is_err());
|
||||||
|
assert!(from_str::<Box<RawValue>>(r#""\udc01\u""#).is_err());
|
||||||
|
assert!(from_str::<Box<RawValue>>(r#""\ud83c\ud83c""#).is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_byte_buf_de_multiple() {
|
fn test_byte_buf_de_multiple() {
|
||||||
let s: Vec<ByteBuf> = from_str(r#"["ab\nc", "cd\ne"]"#).unwrap();
|
let s: Vec<ByteBuf> = from_str(r#"["ab\nc", "cd\ne"]"#).unwrap();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user