mirror of
https://github.com/askama-rs/askama.git
synced 2025-09-28 13:30:59 +00:00
Re-use Html
escaping code to implement JSON escaping
This commit is contained in:
parent
84edf1cc77
commit
6ae689cbc3
@ -6,6 +6,6 @@ members = [
|
||||
"testing",
|
||||
"testing-alloc",
|
||||
"testing-no-std",
|
||||
"testing-renamed"
|
||||
"testing-renamed",
|
||||
]
|
||||
resolver = "2"
|
||||
|
@ -57,7 +57,7 @@ blocks = ["rinja_derive?/blocks"]
|
||||
code-in-doc = ["rinja_derive?/code-in-doc"]
|
||||
config = ["rinja_derive?/config"]
|
||||
derive = ["rinja_derive"]
|
||||
serde_json = ["rinja_derive?/serde_json", "dep:serde", "dep:serde_json"]
|
||||
serde_json = ["std", "rinja_derive?/serde_json", "dep:serde", "dep:serde_json"]
|
||||
std = [
|
||||
"alloc",
|
||||
"rinja_derive?/std",
|
||||
|
@ -114,6 +114,19 @@ impl AsciiChar {
|
||||
Self::new(ALPHABET[d as usize % ALPHABET.len()]),
|
||||
]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn two_hex_digits(d: u32) -> [Self; 2] {
|
||||
const ALPHABET: &[u8; 16] = b"0123456789abcdef";
|
||||
|
||||
if d >= ALPHABET.len().pow(2) as u32 {
|
||||
panic!();
|
||||
}
|
||||
[
|
||||
Self::new(ALPHABET[d as usize / ALPHABET.len()]),
|
||||
Self::new(ALPHABET[d as usize % ALPHABET.len()]),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
mod _ascii_char {
|
||||
|
@ -4,9 +4,10 @@ use std::pin::Pin;
|
||||
use std::{fmt, io, str};
|
||||
|
||||
use serde::Serialize;
|
||||
use serde_json::ser::{PrettyFormatter, Serializer, to_writer};
|
||||
use serde_json::ser::{CompactFormatter, PrettyFormatter, Serializer};
|
||||
|
||||
use super::FastWritable;
|
||||
use crate::ascii_str::{AsciiChar, AsciiStr};
|
||||
|
||||
/// Serialize to JSON (requires `json` feature)
|
||||
///
|
||||
@ -187,9 +188,8 @@ where
|
||||
}
|
||||
|
||||
impl<S: Serialize> FastWritable for ToJson<S> {
|
||||
#[inline]
|
||||
fn write_into<W: fmt::Write + ?Sized>(&self, f: &mut W) -> crate::Result<()> {
|
||||
fmt_json(f, &self.value)
|
||||
serialize(f, &self.value, CompactFormatter)
|
||||
}
|
||||
}
|
||||
|
||||
@ -201,9 +201,12 @@ impl<S: Serialize> fmt::Display for ToJson<S> {
|
||||
}
|
||||
|
||||
impl<S: Serialize, I: AsIndent> FastWritable for ToJsonPretty<S, I> {
|
||||
#[inline]
|
||||
fn write_into<W: fmt::Write + ?Sized>(&self, f: &mut W) -> crate::Result<()> {
|
||||
fmt_json_pretty(f, &self.value, self.indent.as_indent())
|
||||
serialize(
|
||||
f,
|
||||
&self.value,
|
||||
PrettyFormatter::with_indent(self.indent.as_indent().as_bytes()),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -214,59 +217,152 @@ impl<S: Serialize, I: AsIndent> fmt::Display for ToJsonPretty<S, I> {
|
||||
}
|
||||
}
|
||||
|
||||
fn fmt_json<S: Serialize, W: fmt::Write + ?Sized>(dest: &mut W, value: &S) -> crate::Result<()> {
|
||||
Ok(to_writer(JsonWriter(dest), value)?)
|
||||
}
|
||||
#[inline]
|
||||
fn serialize<S, W, F>(dest: &mut W, value: &S, formatter: F) -> Result<(), crate::Error>
|
||||
where
|
||||
S: Serialize + ?Sized,
|
||||
W: fmt::Write + ?Sized,
|
||||
F: serde_json::ser::Formatter,
|
||||
{
|
||||
/// The struct must only ever be used with the output of `serde_json`.
|
||||
/// `serde_json` only produces UTF-8 strings in its `io::Write::write()` calls,
|
||||
/// and `<JsonWriter as io::Write>` depends on this invariant.
|
||||
struct JsonWriter<'a, W: fmt::Write + ?Sized>(&'a mut W);
|
||||
|
||||
fn fmt_json_pretty<S: Serialize, W: fmt::Write + ?Sized>(
|
||||
dest: &mut W,
|
||||
value: &S,
|
||||
indent: &str,
|
||||
) -> crate::Result<()> {
|
||||
let formatter = PrettyFormatter::with_indent(indent.as_bytes());
|
||||
let mut serializer = Serializer::with_formatter(JsonWriter(dest), formatter);
|
||||
Ok(value.serialize(&mut serializer)?)
|
||||
}
|
||||
|
||||
struct JsonWriter<'a, W: fmt::Write + ?Sized>(&'a mut W);
|
||||
|
||||
impl<W: fmt::Write + ?Sized> io::Write for JsonWriter<'_, W> {
|
||||
impl<W: fmt::Write + ?Sized> io::Write for JsonWriter<'_, W> {
|
||||
/// Invariant: must be passed valid UTF-8 slices
|
||||
#[inline]
|
||||
fn write(&mut self, bytes: &[u8]) -> io::Result<usize> {
|
||||
self.write_all(bytes)?;
|
||||
Ok(bytes.len())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
/// Invariant: must be passed valid UTF-8 slices
|
||||
fn write_all(&mut self, bytes: &[u8]) -> io::Result<()> {
|
||||
write(self.0, bytes).map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))
|
||||
// SAFETY: `serde_json` only writes valid strings
|
||||
let string = unsafe { std::str::from_utf8_unchecked(bytes) };
|
||||
write_escaped_str(&mut *self.0, string)
|
||||
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write<W: fmt::Write + ?Sized>(f: &mut W, bytes: &[u8]) -> fmt::Result {
|
||||
/// Invariant: no character that needs escaping is multi-byte character when encoded in UTF-8;
|
||||
/// that is true for characters in ASCII range.
|
||||
#[inline]
|
||||
fn write_escaped_str(dest: &mut (impl fmt::Write + ?Sized), src: &str) -> fmt::Result {
|
||||
// This implementation reads one byte after another.
|
||||
// It's not very fast, but should work well enough until portable SIMD gets stabilized.
|
||||
|
||||
let mut escaped_buf = ESCAPED_BUF_INIT;
|
||||
let mut last = 0;
|
||||
for (index, byte) in bytes.iter().enumerate() {
|
||||
let escaped = match byte {
|
||||
b'&' => Some(br"\u0026"),
|
||||
b'\'' => Some(br"\u0027"),
|
||||
b'<' => Some(br"\u003c"),
|
||||
b'>' => Some(br"\u003e"),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(escaped) = escaped {
|
||||
f.write_str(unsafe { str::from_utf8_unchecked(&bytes[last..index]) })?;
|
||||
f.write_str(unsafe { str::from_utf8_unchecked(escaped) })?;
|
||||
|
||||
for (index, byte) in src.bytes().enumerate() {
|
||||
if let Some(escaped) = get_escaped(byte) {
|
||||
[escaped_buf[4], escaped_buf[5]] = escaped;
|
||||
write_str_if_nonempty(dest, &src[last..index])?;
|
||||
dest.write_str(AsciiStr::from_slice(&escaped_buf[..ESCAPED_BUF_LEN]))?;
|
||||
last = index + 1;
|
||||
}
|
||||
}
|
||||
f.write_str(unsafe { str::from_utf8_unchecked(&bytes[last..]) })
|
||||
write_str_if_nonempty(dest, &src[last..])
|
||||
}
|
||||
|
||||
let mut serializer = Serializer::with_formatter(JsonWriter(dest), formatter);
|
||||
Ok(value.serialize(&mut serializer)?)
|
||||
}
|
||||
|
||||
/// Returns the decimal representation of the codepoint if the character needs HTML escaping.
|
||||
#[inline]
|
||||
fn get_escaped(byte: u8) -> Option<[AsciiChar; 2]> {
|
||||
const _: () = assert!(CHAR_RANGE < 32);
|
||||
|
||||
if let MIN_CHAR..=MAX_CHAR = byte {
|
||||
if (1u32 << (byte - MIN_CHAR)) & BITS != 0 {
|
||||
return Some(TABLE.0[(byte - MIN_CHAR) as usize]);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn write_str_if_nonempty(output: &mut (impl fmt::Write + ?Sized), input: &str) -> fmt::Result {
|
||||
if !input.is_empty() {
|
||||
output.write_str(input)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// List of characters that need HTML escaping, not necessarily in ordinal order.
|
||||
const CHARS: &[u8] = br#"&'<>"#;
|
||||
|
||||
/// The character with the lowest codepoint that needs HTML escaping.
|
||||
const MIN_CHAR: u8 = {
|
||||
let mut v = u8::MAX;
|
||||
let mut i = 0;
|
||||
while i < CHARS.len() {
|
||||
if v > CHARS[i] {
|
||||
v = CHARS[i];
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
v
|
||||
};
|
||||
|
||||
/// The character with the highest codepoint that needs HTML escaping.
|
||||
const MAX_CHAR: u8 = {
|
||||
let mut v = u8::MIN;
|
||||
let mut i = 0;
|
||||
while i < CHARS.len() {
|
||||
if v < CHARS[i] {
|
||||
v = CHARS[i];
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
v
|
||||
};
|
||||
|
||||
const BITS: u32 = {
|
||||
let mut bits = 0;
|
||||
let mut i = 0;
|
||||
while i < CHARS.len() {
|
||||
bits |= 1 << (CHARS[i] - MIN_CHAR);
|
||||
i += 1;
|
||||
}
|
||||
bits
|
||||
};
|
||||
|
||||
/// Number of codepoints between the lowest and highest character that needs escaping, incl.
|
||||
const CHAR_RANGE: usize = (MAX_CHAR - MIN_CHAR + 1) as usize;
|
||||
|
||||
#[repr(align(64))]
|
||||
struct Table([[AsciiChar; 2]; CHAR_RANGE]);
|
||||
|
||||
/// For characters that need HTML escaping, the codepoint is formatted as decimal digits,
|
||||
/// otherwise `b"\0\0"`. Starting at [`MIN_CHAR`].
|
||||
const TABLE: &Table = &{
|
||||
let mut table = Table([UNESCAPED; CHAR_RANGE]);
|
||||
let mut i = 0;
|
||||
while i < CHARS.len() {
|
||||
let c = CHARS[i];
|
||||
table.0[c as u32 as usize - MIN_CHAR as usize] = AsciiChar::two_hex_digits(c as u32);
|
||||
i += 1;
|
||||
}
|
||||
table
|
||||
};
|
||||
|
||||
const UNESCAPED: [AsciiChar; 2] = AsciiStr::new_sized("");
|
||||
|
||||
const ESCAPED_BUF_INIT_UNPADDED: &str = "\\u00__";
|
||||
// RATIONALE: llvm generates better code if the buffer is register sized
|
||||
const ESCAPED_BUF_INIT: [AsciiChar; 8] = AsciiStr::new_sized(ESCAPED_BUF_INIT_UNPADDED);
|
||||
const ESCAPED_BUF_LEN: usize = ESCAPED_BUF_INIT_UNPADDED.len();
|
||||
|
||||
#[cfg(all(test, feature = "alloc"))]
|
||||
mod tests {
|
||||
use alloc::string::ToString;
|
||||
|
Loading…
x
Reference in New Issue
Block a user