mirror of
https://github.com/askama-rs/askama.git
synced 2025-09-29 22:11:17 +00:00
Merge pull request #301 from Kijewski/pr-digits
Speed-up HTML escaping a bit
This commit is contained in:
commit
0373645eb9
@ -41,13 +41,10 @@ pub(crate) fn write_escaped_char(mut dest: impl fmt::Write, c: char) -> fmt::Res
|
||||
}
|
||||
|
||||
/// Returns the decimal representation of the codepoint if the character needs HTML escaping.
|
||||
#[inline(always)]
|
||||
#[inline]
|
||||
fn get_escaped(byte: u8) -> Option<[u8; 2]> {
|
||||
match byte {
|
||||
MIN_CHAR..=MAX_CHAR => match TABLE.lookup[(byte - MIN_CHAR) as usize] {
|
||||
0 => None,
|
||||
escaped => Some(escaped.to_ne_bytes()),
|
||||
},
|
||||
MIN_CHAR..=MAX_CHAR => Some(TABLE.0[(byte - MIN_CHAR) as usize]?.to_bytes()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@ -93,24 +90,17 @@ const MAX_CHAR: u8 = {
|
||||
/// Number of codepoints between the lowest and highest character that needs escaping, incl.
|
||||
const CHAR_RANGE: usize = (MAX_CHAR - MIN_CHAR + 1) as usize;
|
||||
|
||||
struct Table {
|
||||
_align: [usize; 0],
|
||||
lookup: [u16; CHAR_RANGE],
|
||||
}
|
||||
#[repr(align(64))]
|
||||
struct Table([Option<Digits>; CHAR_RANGE]);
|
||||
|
||||
/// For characters that need HTML escaping, the codepoint is formatted as decimal digits,
|
||||
/// otherwise `b"\0\0"`. Starting at [`MIN_CHAR`].
|
||||
const TABLE: Table = {
|
||||
let mut table = Table {
|
||||
_align: [],
|
||||
lookup: [0; CHAR_RANGE],
|
||||
};
|
||||
const TABLE: &Table = &{
|
||||
let mut table = Table([None; CHAR_RANGE]);
|
||||
let mut i = 0;
|
||||
while i < CHARS.len() {
|
||||
let c = CHARS[i];
|
||||
let h = c / 10 + b'0';
|
||||
let l = c % 10 + b'0';
|
||||
table.lookup[(c - MIN_CHAR) as usize] = u16::from_ne_bytes([h, l]);
|
||||
table.0[(c - MIN_CHAR) as usize] = Some(Digits::new(c));
|
||||
i += 1;
|
||||
}
|
||||
table
|
||||
@ -120,6 +110,45 @@ const TABLE: Table = {
|
||||
const ESCAPED_BUF_INIT: [u8; 8] = *b"&#__;\0\0\0";
|
||||
const ESCAPED_BUF_LEN: usize = b"&#__;".len();
|
||||
|
||||
/// All possible decimal representations of codepoints that need escaping in HTML / XML.
|
||||
///
|
||||
/// Using this type instead of e.g. `Option<NonZeroU16>` allows rustc to select any and all of the
|
||||
/// 65,531 unused representations of this type as niche, which can can help speeding up the
|
||||
/// generated byte code. If not all needed representations were present, then the `TABLE` definition
|
||||
/// would fail to compile.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[repr(u16)]
|
||||
enum Digits {
|
||||
/// `'"'`
|
||||
V34 = u16::from_ne_bytes(*b"34"),
|
||||
/// `'&'`
|
||||
V38 = u16::from_ne_bytes(*b"38"),
|
||||
/// `'\''`
|
||||
V39 = u16::from_ne_bytes(*b"39"),
|
||||
/// `'<'`
|
||||
V60 = u16::from_ne_bytes(*b"60"),
|
||||
/// `'>'`
|
||||
V62 = u16::from_ne_bytes(*b"62"),
|
||||
}
|
||||
|
||||
impl Digits {
|
||||
#[inline]
|
||||
const fn to_bytes(self) -> [u8; 2] {
|
||||
(self as u16).to_ne_bytes()
|
||||
}
|
||||
|
||||
const fn new(v: u8) -> Self {
|
||||
match v {
|
||||
34 => Self::V34,
|
||||
38 => Self::V38,
|
||||
39 => Self::V39,
|
||||
60 => Self::V60,
|
||||
62 => Self::V62,
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "alloc")]
|
||||
fn test_simple_html_string_escaping() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user