mirror of
https://github.com/askama-rs/askama.git
synced 2025-10-02 15:25:19 +00:00
parser: use memchr
to speed-up skip_till()
`skip_till()` is used in the parser to find the next block `{%`, comment `{#` or expression `{{`. At every character position, it is tested if one of these three substrings follows. Using [`memchr3()`], we could at least skip to the next candidate, a `{`. The syntax for blocks, comments and expressions can be modified by the user, but that does not matter much; we can simply supply `memchr3()` with the first byte in each of these strings. [`memchr3()`]: <https://docs.rs/memchr/2.7.4/memchr/fn.memrchr3.html> ```text librustdoc/all time: [366.54 µs 366.78 µs 367.02 µs] thrpt: [38.475 MiB/s 38.500 MiB/s 38.525 MiB/s] change: time: [-17.358% -17.065% -16.820%] (p = 0.00 < 0.05) thrpt: [+20.221% +20.576% +21.004%] Performance has improved. librustdoc/item_info time: [6.3315 µs 6.3400 µs 6.3495 µs] thrpt: [24.783 MiB/s 24.820 MiB/s 24.853 MiB/s] change: time: [-6.5547% -6.4090% -6.2633%] (p = 0.00 < 0.05) thrpt: [+6.6818% +6.8479% +7.0144%] Performance has improved. librustdoc/item_union time: [39.377 µs 39.551 µs 39.720 µs] thrpt: [24.850 MiB/s 24.957 MiB/s 25.067 MiB/s] change: time: [-6.9834% -6.2455% -5.5849%] (p = 0.00 < 0.05) thrpt: [+5.9153% +6.6616% +7.5077%] Performance has improved. librustdoc/page time: [170.83 µs 170.99 µs 171.23 µs] thrpt: [36.164 MiB/s 36.213 MiB/s 36.248 MiB/s] change: time: [-12.413% -12.183% -11.968%] (p = 0.00 < 0.05) thrpt: [+13.595% +13.873% +14.173%] Performance has improved. librustdoc/print_item time: [21.163 µs 21.234 µs 21.322 µs] thrpt: [44.280 MiB/s 44.463 MiB/s 44.612 MiB/s] change: time: [-19.848% -18.613% -17.491%] (p = 0.00 < 0.05) thrpt: [+21.198% +22.870% +24.763%] Performance has improved. librustdoc/short_item_info time: [19.781 µs 19.813 µs 19.846 µs] thrpt: [45.652 MiB/s 45.727 MiB/s 45.801 MiB/s] change: time: [-18.027% -17.806% -17.574%] (p = 0.00 < 0.05) thrpt: [+21.321% +21.663% +21.991%] Performance has improved. librustdoc/sidebar time: [40.694 µs 40.806 µs 40.957 µs] thrpt: [30.131 MiB/s 30.242 MiB/s 30.325 MiB/s] change: time: [-14.698% -14.069% -13.456%] (p = 0.00 < 0.05) thrpt: [+15.548% +16.372% +17.231%] Performance has improved. librustdoc/source time: [15.249 µs 15.264 µs 15.278 µs] thrpt: [48.251 MiB/s 48.295 MiB/s 48.343 MiB/s] change: time: [-25.832% -25.678% -25.532%] (p = 0.00 < 0.05) thrpt: [+34.285% +34.550% +34.829%] Performance has improved. librustdoc/type_layout_size time: [9.0168 µs 9.0446 µs 9.0789 µs] thrpt: [29.832 MiB/s 29.945 MiB/s 30.038 MiB/s] change: time: [-11.100% -10.437% -9.4426%] (p = 0.00 < 0.05) thrpt: [+10.427% +11.653% +12.486%] Performance has improved. librustdoc/type_layout time: [34.088 µs 34.114 µs 34.139 µs] thrpt: [78.860 MiB/s 78.919 MiB/s 78.979 MiB/s] change: time: [-37.865% -37.723% -37.585%] (p = 0.00 < 0.05) thrpt: [+60.217% +60.573% +60.941%] Performance has improved. ```
This commit is contained in:
parent
28c7ef753a
commit
7b99783f00
@ -14,6 +14,7 @@ edition = "2021"
|
||||
rust-version = "1.71"
|
||||
|
||||
[dependencies]
|
||||
memchr = "2"
|
||||
nom = { version = "7", default-features = false, features = ["alloc"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -20,6 +20,7 @@ use nom::{error_position, AsChar, InputTakeAtPosition};
|
||||
|
||||
pub mod expr;
|
||||
pub use expr::{Expr, Filter};
|
||||
mod memchr_splitter;
|
||||
pub mod node;
|
||||
pub use node::Node;
|
||||
|
||||
@ -362,22 +363,22 @@ fn ws<'a, O>(
|
||||
|
||||
/// Skips input until `end` was found, but does not consume it.
|
||||
/// Returns tuple that would be returned when parsing `end`.
|
||||
fn skip_till<'a, O>(
|
||||
fn skip_till<'a, 'b, O>(
|
||||
candidate_finder: impl crate::memchr_splitter::Splitter,
|
||||
end: impl FnMut(&'a str) -> ParseResult<'a, O>,
|
||||
) -> impl FnMut(&'a str) -> ParseResult<'a, (&'a str, O)> {
|
||||
enum Next<O> {
|
||||
IsEnd(O),
|
||||
NotEnd,
|
||||
}
|
||||
let mut next = alt((map(end, Next::IsEnd), map(anychar, |_| Next::NotEnd)));
|
||||
let mut next = alt((map(end, Some), map(anychar, |_| None)));
|
||||
move |start: &'a str| {
|
||||
let mut i = start;
|
||||
loop {
|
||||
let (j, is_end) = next(i)?;
|
||||
match is_end {
|
||||
Next::IsEnd(lookahead) => return Ok((i, (j, lookahead))),
|
||||
Next::NotEnd => i = j,
|
||||
}
|
||||
i = match candidate_finder.split(i) {
|
||||
Some((_, j)) => j,
|
||||
None => return Err(nom::Err::Error(ErrorContext::new("`end` not found`", i))),
|
||||
};
|
||||
i = match next(i)? {
|
||||
(j, Some(lookahead)) => return Ok((i, (j, lookahead))),
|
||||
(j, None) => j,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
101
rinja_parser/src/memchr_splitter.rs
Normal file
101
rinja_parser/src/memchr_splitter.rs
Normal file
@ -0,0 +1,101 @@
|
||||
pub(crate) trait Splitter: Copy {
|
||||
/// If any of the needles was found in the haystack, then split the haystack at the first hit.
|
||||
///
|
||||
/// Since only the first byte of a needle is inspected, be aware that there can be
|
||||
/// false-positives. Always compare the latter string of the output if it fits the expected
|
||||
/// prefix.
|
||||
fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)>;
|
||||
}
|
||||
|
||||
impl<T: Splitter + ?Sized> Splitter for &T {
|
||||
#[inline]
|
||||
fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
|
||||
T::split(self, haystack)
|
||||
}
|
||||
}
|
||||
|
||||
// define and implement a string splitter using memchr
|
||||
macro_rules! new_memchr_type {
|
||||
($struct:ident $split_unchecked:ident $memchr:ident $($field:ident)*) => {
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) struct $struct {
|
||||
$($field: u8,)*
|
||||
}
|
||||
|
||||
impl $struct {
|
||||
#[track_caller]
|
||||
pub(crate) fn new($($field: &str),*) -> Self {
|
||||
Self {
|
||||
$($field: $field.as_bytes()[0],)*
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
|
||||
// SAFETY: During the construction of `self` we used strings as inputs, and a
|
||||
// string always starts with a byte at char boundary.
|
||||
unsafe { $split_unchecked($(self.$field,)* haystack) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Splitter for $struct {
|
||||
#[inline]
|
||||
fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
|
||||
self.split(haystack)
|
||||
}
|
||||
}
|
||||
|
||||
/// SAFETY: caller has to ensure that the needle is at a char boundary
|
||||
pub(crate) unsafe fn $split_unchecked(
|
||||
$($field: u8,)*
|
||||
haystack: &str,
|
||||
) -> Option<(&str, &str)> {
|
||||
let idx = memchr::$memchr($($field,)* haystack.as_bytes())?;
|
||||
// SAFETY: The caller ensures that the needles are at char boundary.
|
||||
// The found index `< haystack.len()`.
|
||||
Some((haystack.get_unchecked(..idx), haystack.get_unchecked(idx..)))
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
new_memchr_type!(Splitter1 split1_unchecked memchr a);
|
||||
new_memchr_type!(Splitter2 split2_unchecked memchr2 a b);
|
||||
new_memchr_type!(Splitter3 split3_unchecked memchr3 a b c);
|
||||
|
||||
#[test]
|
||||
fn candidate_finder() {
|
||||
assert_eq!(
|
||||
Splitter1::new("test").split("abctefg"),
|
||||
Some(("abc", "tefg")),
|
||||
);
|
||||
assert_eq!(Splitter1::new("xyz").split("abctefg"), None);
|
||||
|
||||
assert_eq!(
|
||||
Splitter2::new("xyz", "foo").split("abctefg"),
|
||||
Some(("abcte", "fg")),
|
||||
);
|
||||
assert_eq!(Splitter2::new("oof", "xyz").split("abctefg"), None);
|
||||
|
||||
assert_eq!(
|
||||
Splitter3::new("oof", "apples", "xyz").split("abctefg"),
|
||||
Some(("", "abctefg")),
|
||||
);
|
||||
assert_eq!(
|
||||
Splitter3::new("oof", "peaches", "xyz").split("abctefg"),
|
||||
None
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Splitter3::new("test", "test", "test").split("abctefg"),
|
||||
Some(("abc", "tefg")),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Splitter3::new("🧚♀️Life", "😀Laugh", "😻Love")
|
||||
.split("sed diam nonumy eirmod tempor 🧚♀️Life ut labore et dolore magna aliquyam"),
|
||||
Some((
|
||||
"sed diam nonumy eirmod tempor ",
|
||||
"🧚♀️Life ut labore et dolore magna aliquyam"
|
||||
)),
|
||||
);
|
||||
}
|
@ -9,6 +9,7 @@ use nom::error_position;
|
||||
use nom::multi::{many0, many1, separated_list0};
|
||||
use nom::sequence::{delimited, pair, preceded, tuple};
|
||||
|
||||
use crate::memchr_splitter::{Splitter1, Splitter2, Splitter3};
|
||||
use crate::{
|
||||
filter, identifier, is_ws, keyword, not_ws, skip_till, str_lit, ws, ErrorContext, Expr, Filter,
|
||||
ParseResult, State, Target, WithSpan,
|
||||
@ -755,14 +756,20 @@ pub struct Lit<'a> {
|
||||
impl<'a> Lit<'a> {
|
||||
fn parse(i: &'a str, s: &State<'_>) -> ParseResult<'a, WithSpan<'a, Self>> {
|
||||
let start = i;
|
||||
let (i, _) = not(eof)(i)?;
|
||||
|
||||
let candidate_finder = Splitter3::new(
|
||||
s.syntax.block_start,
|
||||
s.syntax.comment_start,
|
||||
s.syntax.expr_start,
|
||||
);
|
||||
let p_start = alt((
|
||||
tag(s.syntax.block_start),
|
||||
tag(s.syntax.comment_start),
|
||||
tag(s.syntax.expr_start),
|
||||
));
|
||||
|
||||
let (i, _) = not(eof)(i)?;
|
||||
let (i, content) = opt(recognize(skip_till(p_start)))(i)?;
|
||||
let (i, content) = opt(recognize(skip_till(candidate_finder, p_start)))(i)?;
|
||||
let (i, content) = match content {
|
||||
Some("") => {
|
||||
// {block,comment,expr}_start follows immediately.
|
||||
@ -810,7 +817,7 @@ impl<'a> Raw<'a> {
|
||||
cut(tuple((
|
||||
opt(Whitespace::parse),
|
||||
|i| s.tag_block_end(i),
|
||||
consumed(skip_till(endraw)),
|
||||
consumed(skip_till(Splitter1::new(s.syntax.block_start), endraw)),
|
||||
))),
|
||||
));
|
||||
|
||||
@ -989,7 +996,8 @@ impl<'a> Comment<'a> {
|
||||
let mut depth = 0usize;
|
||||
loop {
|
||||
let start = i;
|
||||
let (_, tag) = opt(skip_till(|i| tag(i, s)))(i)?;
|
||||
let splitter = Splitter2::new(s.syntax.comment_start, s.syntax.comment_end);
|
||||
let (_, tag) = opt(skip_till(splitter, |i| tag(i, s)))(i)?;
|
||||
let Some((j, tag)) = tag else {
|
||||
return Err(
|
||||
ErrorContext::unclosed("comment", s.syntax.comment_end, start).into(),
|
||||
|
@ -371,10 +371,36 @@ fn change_delimiters_parse_filter() {
|
||||
expr_end: "=}",
|
||||
..Syntax::default()
|
||||
};
|
||||
|
||||
Ast::from_str("{= strvar|e =}", None, &syntax).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unicode_delimiters_in_syntax() {
|
||||
let syntax = Syntax {
|
||||
expr_start: "🖎", // U+1F58E == b"\xf0\x9f\x96\x8e"
|
||||
expr_end: "✍", // U+270D = b'\xe2\x9c\x8d'
|
||||
..Syntax::default()
|
||||
};
|
||||
assert_eq!(
|
||||
Ast::from_str("Here comes the expression: 🖎 e ✍.", None, &syntax)
|
||||
.unwrap()
|
||||
.nodes(),
|
||||
[
|
||||
Node::Lit(WithSpan::no_span(Lit {
|
||||
lws: "",
|
||||
val: "Here comes the expression:",
|
||||
rws: " ",
|
||||
})),
|
||||
Node::Expr(Ws(None, None), WithSpan::no_span(Expr::Var("e")),),
|
||||
Node::Lit(WithSpan::no_span(Lit {
|
||||
lws: "",
|
||||
val: ".",
|
||||
rws: "",
|
||||
})),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_precedence() {
|
||||
let syntax = Syntax::default();
|
||||
|
Loading…
x
Reference in New Issue
Block a user