mirror of
https://github.com/askama-rs/askama.git
synced 2025-09-27 13:00:57 +00:00
Use unicode-ident
to determine what is an identifier
Resolves <https://github.com/askama-rs/askama/issues/442>. The performance is actually slightly better than before. `unicode-ident` is highly optimized and jump free. One test from a fuzzer outcome had to be deleted, because it contained identifiers that weren't actually identifiers. There is still a test that tests the same problem, but every identifier is simply `x`. In another fuzzed test a character [`U+E0049`] was removed. [`U+E0049`]: https://en.wikipedia.org/w/index.php?oldid=1278382889 <details> <summary>Benchmark results</summary> ```text $ cd askama_parser && cargo bench librustdoc/all time: [184.47 µs 185.30 µs 186.08 µs] thrpt: [75.887 MiB/s 76.207 MiB/s 76.548 MiB/s] change: time: [−1.4364% −0.9672% −0.4738%] (p = 0.00 < 0.05) thrpt: [+0.4761% +0.9767% +1.4573%] Change within noise threshold. librustdoc/item_info time: [3.3880 µs 3.3892 µs 3.3906 µs] thrpt: [46.409 MiB/s 46.428 MiB/s 46.445 MiB/s] change: time: [−3.8735% −3.5927% −3.2949%] (p = 0.00 < 0.05) thrpt: [+3.4071% +3.7266% +4.0296%] Performance has improved. librustdoc/item_union time: [20.052 µs 20.087 µs 20.126 µs] thrpt: [49.044 MiB/s 49.140 MiB/s 49.224 MiB/s] change: time: [−2.2419% −1.8647% −1.5113%] (p = 0.00 < 0.05) thrpt: [+1.5345% +1.9002% +2.2933%] Performance has improved. librustdoc/page time: [85.828 µs 86.157 µs 86.518 µs] thrpt: [71.571 MiB/s 71.871 MiB/s 72.147 MiB/s] change: time: [−1.2728% −0.7668% −0.2512%] (p = 0.00 < 0.05) thrpt: [+0.2518% +0.7727% +1.2892%] Change within noise threshold. librustdoc/print_item time: [10.065 µs 10.101 µs 10.138 µs] thrpt: [93.132 MiB/s 93.469 MiB/s 93.806 MiB/s] change: time: [−3.3793% −2.8352% −2.3267%] (p = 0.00 < 0.05) thrpt: [+2.3821% +2.9180% +3.4975%] Performance has improved. librustdoc/short_item_info time: [9.0741 µs 9.1018 µs 9.1377 µs] thrpt: [99.148 MiB/s 99.540 MiB/s 99.843 MiB/s] change: time: [−4.7480% −4.2335% −3.7763%] (p = 0.00 < 0.05) thrpt: [+3.9245% +4.4207% +4.9847%] Performance has improved. librustdoc/sidebar time: [21.468 µs 21.555 µs 21.648 µs] thrpt: [57.004 MiB/s 57.252 MiB/s 57.482 MiB/s] change: time: [−3.7641% −3.0465% −2.4191%] (p = 0.00 < 0.05) thrpt: [+2.4791% +3.1423% +3.9114%] Performance has improved. librustdoc/source time: [7.9602 µs 7.9780 µs 7.9929 µs] thrpt: [92.230 MiB/s 92.403 MiB/s 92.609 MiB/s] change: time: [−1.6386% −1.0684% −0.5875%] (p = 0.00 < 0.05) thrpt: [+0.5910% +1.0799% +1.6659%] Change within noise threshold. librustdoc/type_layout_size time: [4.7821 µs 4.7915 µs 4.8017 µs] thrpt: [56.406 MiB/s 56.526 MiB/s 56.637 MiB/s] change: time: [−1.9743% −1.4867% −1.0153%] (p = 0.00 < 0.05) thrpt: [+1.0257% +1.5091% +2.0141%] Performance has improved. librustdoc/type_layout time: [15.022 µs 15.051 µs 15.076 µs] thrpt: [178.57 MiB/s 178.88 MiB/s 179.22 MiB/s] change: time: [−1.5028% −1.0358% −0.5705%] (p = 0.00 < 0.05) thrpt: [+0.5738% +1.0466% +1.5257%] Change within noise threshold. ``` </details>
This commit is contained in:
parent
37101cb95d
commit
3775f4e3a3
@ -24,6 +24,7 @@ harness = false
|
||||
memchr = "2"
|
||||
serde = { version = "1.0", optional = true }
|
||||
serde_derive = { version = "1.0", optional = true }
|
||||
unicode-ident = "1.0.12"
|
||||
winnow = "0.7.0"
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -21,7 +21,7 @@ use std::{fmt, str};
|
||||
use winnow::ascii::take_escaped;
|
||||
use winnow::combinator::{alt, cut_err, delimited, fail, not, opt, peek, preceded, repeat};
|
||||
use winnow::error::FromExternalError;
|
||||
use winnow::stream::{AsChar, Stream as _};
|
||||
use winnow::stream::Stream as _;
|
||||
use winnow::token::{any, one_of, take_till, take_while};
|
||||
use winnow::{ModalParser, Parser};
|
||||
|
||||
@ -399,13 +399,9 @@ fn keyword(k: &str) -> impl ModalParser<&str, &str, ErrorContext<'_>> {
|
||||
}
|
||||
|
||||
fn identifier<'i>(input: &mut &'i str) -> ParseResult<'i> {
|
||||
let start = take_while(1.., |c: char| c.is_alpha() || c == '_' || c >= '\u{0080}');
|
||||
|
||||
let tail = take_while(1.., |c: char| {
|
||||
c.is_alphanum() || c == '_' || c >= '\u{0080}'
|
||||
});
|
||||
|
||||
(start, opt(tail)).take().parse_next(input)
|
||||
let head = any.verify(|&c| c == '_' || unicode_ident::is_xid_start(c));
|
||||
let tail = take_while(.., unicode_ident::is_xid_continue);
|
||||
(head, tail).take().parse_next(input)
|
||||
}
|
||||
|
||||
fn bool_lit<'i>(i: &mut &'i str) -> ParseResult<'i> {
|
||||
|
@ -1,3 +1,5 @@
|
||||
use winnow::Parser;
|
||||
|
||||
use crate::node::{Lit, Whitespace, Ws};
|
||||
use crate::{
|
||||
Ast, Expr, Filter, InnerSyntax, Node, Num, PathOrIdentifier, Span, StrLit, Syntax,
|
||||
@ -1216,13 +1218,6 @@ fn fuzzed_excessive_filter_block() {
|
||||
err.to_string().lines().next(),
|
||||
Some("your template code is too deeply nested, or the last expression is too complex"),
|
||||
);
|
||||
|
||||
let src = include!("../tests/fuzzed_excessive_filter_block.inc");
|
||||
let err = Ast::from_str(src, None, &Syntax::default()).unwrap_err();
|
||||
assert_eq!(
|
||||
err.to_string().lines().next(),
|
||||
Some("your template code is too deeply nested, or the last expression is too complex"),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -1412,3 +1407,11 @@ fn test_filter_with_path() {
|
||||
"failed to parse template source near offset 16",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn underscore_is_an_identifier() {
|
||||
let mut input = "_";
|
||||
let result = crate::identifier.parse_next(&mut input);
|
||||
assert_eq!(result.unwrap(), "_");
|
||||
assert_eq!(input, "");
|
||||
}
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user