mirror of
https://github.com/serde-rs/json.git
synced 2025-10-02 15:26:00 +00:00
Format UTF-8 strings manually
This speeds up War and Peace 290 MB/s -> 330 MB/s (+15%).
This commit is contained in:
parent
a38dbf3708
commit
0e90b61b8c
60
src/read.rs
60
src/read.rs
@ -1,6 +1,5 @@
|
||||
use crate::error::{Error, ErrorCode, Result};
|
||||
use alloc::vec::Vec;
|
||||
use core::char;
|
||||
use core::cmp;
|
||||
use core::mem;
|
||||
use core::ops::Deref;
|
||||
@ -957,25 +956,64 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
|
||||
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
|
||||
}
|
||||
|
||||
let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
|
||||
|
||||
match char::from_u32(n) {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return error(read, ErrorCode::InvalidUnicodeCodePoint);
|
||||
}
|
||||
}
|
||||
// This value is in range U+10000..=U+10FFFF, which is always a
|
||||
// valid codepoint.
|
||||
(((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000
|
||||
}
|
||||
|
||||
// Every u16 outside of the surrogate ranges above is guaranteed
|
||||
// to be a legal char.
|
||||
n => char::from_u32(n as u32).unwrap(),
|
||||
n => n as u32,
|
||||
};
|
||||
|
||||
scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
|
||||
// SAFETY: c is always a codepoint.
|
||||
unsafe {
|
||||
push_utf8_codepoint(c, scratch);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Adds a UTF-8 codepoint to the end of the buffer. This is a more efficient
|
||||
/// implementation of String::push. n must be a valid codepoint.
|
||||
#[inline]
|
||||
unsafe fn push_utf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
|
||||
if n < 0x80 {
|
||||
scratch.push(n as u8);
|
||||
return;
|
||||
}
|
||||
|
||||
scratch.reserve(4);
|
||||
|
||||
unsafe {
|
||||
let ptr = scratch.as_mut_ptr().add(scratch.len());
|
||||
|
||||
let encoded_len = match n {
|
||||
0..=0x7F => unreachable!(),
|
||||
0x80..=0x7FF => {
|
||||
ptr.write((n >> 6 & 0b0001_1111) as u8 | 0b1100_0000);
|
||||
2
|
||||
}
|
||||
0x800..=0xFFFF => {
|
||||
ptr.write((n >> 12 & 0b0000_1111) as u8 | 0b1110_0000);
|
||||
ptr.add(1).write((n >> 6 & 0b0011_1111) as u8 | 0b1000_0000);
|
||||
3
|
||||
}
|
||||
0x1_0000..=0x10_FFFF => {
|
||||
ptr.write((n >> 18 & 0b0000_0111) as u8 | 0b1111_0000);
|
||||
ptr.add(1)
|
||||
.write((n >> 12 & 0b0011_1111) as u8 | 0b1000_0000);
|
||||
ptr.add(2).write((n >> 6 & 0b0011_1111) as u8 | 0b1000_0000);
|
||||
4
|
||||
}
|
||||
0x11_0000.. => unreachable!(),
|
||||
};
|
||||
ptr.add(encoded_len - 1)
|
||||
.write((n & 0b0011_1111) as u8 | 0b1000_0000);
|
||||
|
||||
scratch.set_len(scratch.len() + encoded_len);
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a JSON escape sequence and discards the value. Assumes the previous
|
||||
/// byte read was a backslash.
|
||||
fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
|
||||
|
Loading…
x
Reference in New Issue
Block a user