parser: macro calls need proper tokens

With this PR invalid tokens inside a macro call `macro_name!(..)` are
rejected. Otherwise we might emit invalid code that cannot be parsed by
rust.
This commit is contained in:
René Kijewski 2025-06-01 18:44:54 +02:00 committed by René Kijewski
parent 6010cd390f
commit ca50787f92
2 changed files with 130 additions and 50 deletions

View File

@ -1276,3 +1276,25 @@ fn test_macro_names_that_need_escaping() {
); );
} }
} }
#[test]
#[rustfmt::skip] // FIXME: rustfmt bug <https://github.com/rust-lang/rustfmt/issues/6565>
fn test_macro_calls_need_proper_tokens() -> Result<(), syn::Error> {
// Regression test for fuzzed error <https://github.com/askama-rs/askama/issues/459>.
// Macro calls can contains any valid tokens, but only valid tokens.
// Invalid tokens will be rejected by rust, so we must not emit them.
let input = quote! {
#[template(
ext = "",
source = "\u{c}awtraitaitA{{override\u{c}! \u{c} (\u{1f} \u{c}\u{c})\u{c}}}"
// ^^^^^^^^ ^^^^^^
// illegal identifier illegal token
)]
struct f {}
};
let output = crate::derive_template(input, import_askama);
assert!(output.to_string().contains("expected valid tokens in macro call"));
let _: syn::File = syn::parse2(output)?;
Ok(())
}

View File

@ -7,7 +7,7 @@ use winnow::combinator::{
alt, cut_err, fail, not, opt, peek, preceded, repeat, separated, terminated, alt, cut_err, fail, not, opt, peek, preceded, repeat, separated, terminated,
}; };
use winnow::error::ParserError as _; use winnow::error::ParserError as _;
use winnow::stream::Stream as _; use winnow::token::one_of;
use crate::node::CondTest; use crate::node::CondTest;
use crate::{ use crate::{
@ -709,60 +709,118 @@ impl<'a> Suffix<'a> {
} }
fn r#macro(i: &mut &'a str) -> ParseResult<'a, Self> { fn r#macro(i: &mut &'a str) -> ParseResult<'a, Self> {
fn nested_parenthesis<'a>(input: &mut &'a str) -> ParseResult<'a, ()> { #[derive(Debug, Clone, Copy, PartialEq, Eq)]
let mut nested = 0; enum Token {
let mut last = 0; SomeOther,
let mut in_str = false; Open(Group),
let mut escaped = false; Close(Group),
}
for (i, c) in input.char_indices() { #[derive(Debug, Clone, Copy, PartialEq, Eq)]
if !(c == '(' || c == ')') || !in_str { enum Group {
match c { Paren, // `(`
'(' => nested += 1, Brace, // `{`
')' => { Bracket, // `[`
if nested == 0 { }
last = i;
break; impl Group {
} fn as_close_char(self) -> char {
nested -= 1; match self {
} Group::Paren => ')',
'"' => { Group::Brace => '}',
if in_str { Group::Bracket => ']',
if !escaped {
in_str = false;
}
} else {
in_str = true;
}
}
'\\' => {
escaped = !escaped;
}
_ => (),
}
} }
if escaped && c != '\\' {
escaped = false;
}
}
if nested == 0 {
let _ = input.next_slice(last);
Ok(())
} else {
fail.parse_next(input)
} }
} }
preceded( fn macro_arguments<'a>(i: &mut &'a str, open_token: Group) -> ParseResult<'a, Suffix<'a>> {
(ws('!'), '('), let start = *i;
cut_err(terminated( let mut open_list: Vec<Group> = vec![open_token];
nested_parenthesis.take().map(Self::MacroCall), loop {
')', let before = *i;
)), let (token, token_span) = ws(opt(token).with_taken()).parse_next(i)?;
) let Some(token) = token else {
.parse_next(i) return Err(winnow::error::ErrMode::Cut(ErrorContext::new(
"expected valid tokens in macro call",
token_span,
)));
};
let close_token = match token {
Token::SomeOther => continue,
Token::Open(group) => {
open_list.push(group);
continue;
}
Token::Close(close_token) => close_token,
};
let open_token = open_list.pop().unwrap();
if open_token != close_token {
return Err(winnow::error::ErrMode::Cut(ErrorContext::new(
format!(
"expected `{}` but found `{}`",
open_token.as_close_char(),
close_token.as_close_char(),
),
token_span,
)));
} else if open_list.is_empty() {
return Ok(Suffix::MacroCall(&start[..start.len() - before.len()]));
}
}
}
fn token<'a>(i: &mut &'a str) -> ParseResult<'a, Token> {
// <https://doc.rust-lang.org/reference/tokens.html>
let some_other = alt((
// keywords + identifiers
identifier.value(Token::SomeOther),
// literals
Expr::char.value(Token::SomeOther),
Expr::str.value(Token::SomeOther),
Expr::num.value(Token::SomeOther),
// lifetimes
('\'', identifier, not(peek('\''))).value(Token::SomeOther),
// punctuations
punctuation.value(Token::SomeOther),
));
alt((open.map(Token::Open), close.map(Token::Close), some_other)).parse_next(i)
}
fn punctuation<'a>(i: &mut &'a str) -> ParseResult<'a, ()> {
// <https://doc.rust-lang.org/reference/tokens.html#punctuation>
let one = one_of([
'+', '-', '*', '/', '%', '^', '!', '&', '|', '=', '>', '<', '@', '_', '.', ',',
';', ':', '#', '$', '?', '~',
]);
let two = alt((
"&&", "||", "<<", ">>", "+=", "-=", "*=", "/=", "%=", "^=", "&=", "|=", "==", "!=",
">=", "<=", "..", "::", "->", "=>", "<-",
));
let three = alt(("<<=", ">>=", "...", "..="));
alt((three.value(()), two.value(()), one.value(()))).parse_next(i)
}
fn open<'a>(i: &mut &'a str) -> ParseResult<'a, Group> {
alt((
'('.value(Group::Paren),
'{'.value(Group::Brace),
'['.value(Group::Bracket),
))
.parse_next(i)
}
fn close<'a>(i: &mut &'a str) -> ParseResult<'a, Group> {
alt((
')'.value(Group::Paren),
'}'.value(Group::Brace),
']'.value(Group::Bracket),
))
.parse_next(i)
}
let open_token = preceded(ws('!'), open).parse_next(i)?;
(|i: &mut _| macro_arguments(i, open_token)).parse_next(i)
} }
fn attr(i: &mut &'a str, level: Level<'_>) -> ParseResult<'a, Self> { fn attr(i: &mut &'a str, level: Level<'_>) -> ParseResult<'a, Self> {