From fe8c88abec572ca4b02292d61d4c6bb32e2588ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Mon, 19 Aug 2024 00:56:33 +0200 Subject: [PATCH 1/7] derive: cannot write strings with prefix --- rinja_derive/src/generator.rs | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/rinja_derive/src/generator.rs b/rinja_derive/src/generator.rs index 000a134a..8e3d662f 100644 --- a/rinja_derive/src/generator.rs +++ b/rinja_derive/src/generator.rs @@ -1270,18 +1270,15 @@ impl<'a> Generator<'a> { } // for now, we only escape strings and chars at compile time - let (lit, escape_prefix) = match &**s { - Expr::StrLit(StrLit { prefix, content }) => { - (InputKind::StrLit(content), prefix.map(|p| p.to_char())) - } - Expr::CharLit(CharLit { prefix, content }) => ( - InputKind::CharLit(content), - if *prefix == Some(CharPrefix::Binary) { - Some('b') - } else { - None - }, - ), + let lit = match &**s { + Expr::StrLit(StrLit { + prefix: None, + content, + }) => InputKind::StrLit(content), + Expr::CharLit(CharLit { + prefix: None, + content, + }) => InputKind::CharLit(content), _ => return None, }; @@ -1323,9 +1320,6 @@ impl<'a> Generator<'a> { OutputKind::Text => unescaped, OutputKind::Html => { let mut escaped = String::with_capacity(unescaped.len() + 20); - if let Some(escape_prefix) = escape_prefix { - escaped.push(escape_prefix); - } write_escaped_str(&mut escaped, &unescaped).ok()?; match escaped == unescaped { true => unescaped, From 37bbf0b88f67d17e5cca86bc6e5b04e65e2bf2b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Mon, 19 Aug 2024 01:40:04 +0200 Subject: [PATCH 2/7] parser: parse floats like 123e456 --- rinja_parser/src/lib.rs | 41 ++++++++++++++------------------------- rinja_parser/src/tests.rs | 13 +++++++++++++ 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/rinja_parser/src/lib.rs b/rinja_parser/src/lib.rs index 8180e5b7..b3425738 100644 --- a/rinja_parser/src/lib.rs +++ b/rinja_parser/src/lib.rs @@ -340,20 +340,16 @@ fn bool_lit(i: &str) -> ParseResult<'_> { alt((keyword("false"), keyword("true")))(i) } -fn num_lit(i: &str) -> ParseResult<'_> { +fn num_lit<'a>(i: &'a str) -> ParseResult<'a> { fn suffix<'a>( start: &'a str, kind: &'a str, list: &'a [&str], - ignore: &'a [&str], - ) -> impl Fn(&'a str) -> ParseResult<'a> + Copy + 'a { + ) -> impl Fn(&'a str) -> ParseResult<'a, ()> + Copy + 'a { move |i| { let (i, suffix) = identifier(i)?; if list.contains(&suffix) { - Ok((i, suffix)) - } else if ignore.contains(&suffix) { - // no need for a message, this case only occures in an `opt(…)` - fail(i) + Ok((i, ())) } else { Err(nom::Err::Failure(ErrorContext::new( format!("unknown {kind} suffix `{suffix}`"), @@ -363,9 +359,16 @@ fn num_lit(i: &str) -> ParseResult<'_> { } } - let integer_suffix = suffix(i, "integer", INTEGER_TYPES, &[]); - let float_suffix = suffix(i, "float", FLOAT_TYPES, &["e"]); - let either_suffix = suffix(i, "number", NUM_TYPES, &["e"]); + let float = |start: &'a str| -> ParseResult<'a, ()> { + let (i, has_dot) = opt(tuple((char('.'), separated_digits(10, true))))(start)?; + let (i, has_exp) = opt(pair(one_of("eE"), opt(one_of("+-"))))(i)?; + if has_dot.is_none() && has_exp.is_none() { + return fail(start); + } + let (i, _) = cut(separated_digits(10, false))(i)?; + let (i, _) = opt(suffix(i, "float", FLOAT_TYPES))(i)?; + Ok((i, ())) + }; recognize(tuple(( opt(char('-')), @@ -377,25 +380,11 @@ fn num_lit(i: &str) -> ParseResult<'_> { recognize(tuple((char('o'), separated_digits(8, false)))), recognize(tuple((char('x'), separated_digits(16, false)))), )), - opt(integer_suffix), + opt(suffix(i, "integer", INTEGER_TYPES)), ))), recognize(tuple(( separated_digits(10, true), - opt(alt(( - either_suffix, - recognize(tuple(( - opt(tuple((char('.'), separated_digits(10, true)))), - one_of("eE"), - opt(one_of("+-")), - separated_digits(10, false), - opt(float_suffix), - ))), - recognize(tuple(( - char('.'), - separated_digits(10, true), - opt(float_suffix), - ))), - ))), + opt(alt((float, suffix(i, "number", NUM_TYPES)))), ))), )), )))(i) diff --git a/rinja_parser/src/tests.rs b/rinja_parser/src/tests.rs index 8d7143b5..26be23a4 100644 --- a/rinja_parser/src/tests.rs +++ b/rinja_parser/src/tests.rs @@ -1067,3 +1067,16 @@ fn fuzzed_filter_recursion() { const TEMPLATE: &str = include_str!("../tests/filter-recursion.txt"); assert!(Ast::from_str(TEMPLATE, None, &Syntax::default()).is_err()); } + +#[test] +fn four_thousand() { + assert_eq!( + Ast::from_str("{{4e3}}", None, &Syntax::default()) + .unwrap() + .nodes, + vec![Node::Expr( + Ws(None, None), + WithSpan::no_span(Expr::NumLit("4e3")), + )], + ); +} From ede4702919840b7d67aea1ae157406b2c4c9d35e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Mon, 19 Aug 2024 01:42:56 +0200 Subject: [PATCH 3/7] derive: compile time int, float, bool escaping test --- rinja_derive/src/tests.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/rinja_derive/src/tests.rs b/rinja_derive/src/tests.rs index f698c7c8..6a56206c 100644 --- a/rinja_derive/src/tests.rs +++ b/rinja_derive/src/tests.rs @@ -580,4 +580,26 @@ A &[], 23, ); + + compare( + r#"{{ 1_2_3_4 }} {{ 4e3 }} {{ false }}"#, + r#"match ( + &((&&::rinja::filters::AutoEscaper::new(&(1_2_3_4), ::rinja::filters::Text)) + .rinja_auto_escape()?), + &((&&::rinja::filters::AutoEscaper::new(&(4e3), ::rinja::filters::Text)) + .rinja_auto_escape()?), + &((&&::rinja::filters::AutoEscaper::new(&(false), ::rinja::filters::Text)) + .rinja_auto_escape()?), + ) { + (expr0, expr2, expr4) => { + (&&::rinja::filters::Writable(expr0)).rinja_write(writer)?; + writer.write_str(" ")?; + (&&::rinja::filters::Writable(expr2)).rinja_write(writer)?; + writer.write_str(" ")?; + (&&::rinja::filters::Writable(expr4)).rinja_write(writer)?; + } + }"#, + &[], + 11, + ); } From fa003fcd0d7eca514407abe2ba7152d0ccb6d213 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Mon, 19 Aug 2024 04:13:16 +0200 Subject: [PATCH 4/7] parser: fix float parsing --- rinja_derive/src/generator.rs | 10 +- rinja_parser/src/expr.rs | 15 +-- rinja_parser/src/lib.rs | 217 +++++++++++++++++++++++++--------- rinja_parser/src/target.rs | 6 +- rinja_parser/src/tests.rs | 89 +++++++------- 5 files changed, 223 insertions(+), 114 deletions(-) diff --git a/rinja_derive/src/generator.rs b/rinja_derive/src/generator.rs index 8e3d662f..0697a882 100644 --- a/rinja_derive/src/generator.rs +++ b/rinja_derive/src/generator.rs @@ -399,7 +399,7 @@ impl<'a> Generator<'a> { let (expr, span) = expr.deconstruct(); match expr { - Expr::NumLit(_) + Expr::NumLit(_, _) | Expr::StrLit(_) | Expr::CharLit(_) | Expr::Var(_) @@ -1483,7 +1483,7 @@ impl<'a> Generator<'a> { ) -> Result { Ok(match **expr { Expr::BoolLit(s) => self.visit_bool_lit(buf, s), - Expr::NumLit(s) => self.visit_num_lit(buf, s), + Expr::NumLit(s, _) => self.visit_num_lit(buf, s), Expr::StrLit(ref s) => self.visit_str_lit(buf, s), Expr::CharLit(ref s) => self.visit_char_lit(buf, s), Expr::Var(s) => self.visit_var(buf, s), @@ -2200,7 +2200,7 @@ impl<'a> Generator<'a> { } self.visit_str_lit(buf, s); } - Target::NumLit(s) => { + Target::NumLit(s, _) => { if first_level { buf.write('&'); } @@ -2609,7 +2609,7 @@ fn is_copyable(expr: &Expr<'_>) -> bool { fn is_copyable_within_op(expr: &Expr<'_>, within_op: bool) -> bool { match expr { - Expr::BoolLit(_) | Expr::NumLit(_) | Expr::StrLit(_) | Expr::CharLit(_) => true, + Expr::BoolLit(_) | Expr::NumLit(_, _) | Expr::StrLit(_) | Expr::CharLit(_) => true, Expr::Unary(.., expr) => is_copyable_within_op(expr, true), Expr::BinOp(_, lhs, rhs) => { is_copyable_within_op(lhs, true) && is_copyable_within_op(rhs, true) @@ -2645,7 +2645,7 @@ pub(crate) fn is_cacheable(expr: &WithSpan<'_, Expr<'_>>) -> bool { match &**expr { // Literals are the definition of pure: Expr::BoolLit(_) => true, - Expr::NumLit(_) => true, + Expr::NumLit(_, _) => true, Expr::StrLit(_) => true, Expr::CharLit(_) => true, // fmt::Display should have no effects: diff --git a/rinja_parser/src/expr.rs b/rinja_parser/src/expr.rs index da917d7f..3646b5d3 100644 --- a/rinja_parser/src/expr.rs +++ b/rinja_parser/src/expr.rs @@ -3,8 +3,8 @@ use std::str; use nom::branch::alt; use nom::bytes::complete::{tag, take_till}; -use nom::character::complete::char; -use nom::combinator::{cut, fail, map, not, opt, peek, recognize, value}; +use nom::character::complete::{char, digit1}; +use nom::combinator::{consumed, cut, fail, map, not, opt, peek, recognize, value}; use nom::error::ErrorKind; use nom::error_position; use nom::multi::{fold_many0, many0, separated_list0}; @@ -12,7 +12,7 @@ use nom::sequence::{pair, preceded, terminated, tuple}; use crate::{ char_lit, filter, identifier, keyword, not_ws, num_lit, path_or_identifier, str_lit, ws, - CharLit, ErrorContext, Level, ParseResult, PathOrIdentifier, StrLit, WithSpan, + CharLit, ErrorContext, Level, Num, ParseResult, PathOrIdentifier, StrLit, WithSpan, }; macro_rules! expr_prec_layer { @@ -35,7 +35,7 @@ macro_rules! expr_prec_layer { #[derive(Clone, Debug, PartialEq)] pub enum Expr<'a> { BoolLit(bool), - NumLit(&'a str), + NumLit(&'a str, Num<'a>), StrLit(StrLit<'a>), CharLit(CharLit<'a>), Var(&'a str), @@ -359,7 +359,8 @@ impl<'a> Expr<'a> { fn num(i: &'a str) -> ParseResult<'a, WithSpan<'a, Self>> { let start = i; - map(num_lit, |i| WithSpan::new(Self::NumLit(i), start))(i) + let (i, (full, num)) = consumed(num_lit)(i)?; + Ok((i, WithSpan::new(Expr::NumLit(full, num), start))) } fn char(i: &'a str) -> ParseResult<'a, WithSpan<'a, Self>> { @@ -374,7 +375,7 @@ impl<'a> Expr<'a> { Self::BinOp("&&" | "||", left, right) => { left.contains_bool_lit_or_is_defined() || right.contains_bool_lit_or_is_defined() } - Self::NumLit(_) + Self::NumLit(_, _) | Self::StrLit(_) | Self::CharLit(_) | Self::Var(_) @@ -530,7 +531,7 @@ impl<'a> Suffix<'a> { map( preceded( ws(pair(char('.'), not(char('.')))), - cut(alt((num_lit, identifier))), + cut(alt((digit1, identifier))), ), Self::Attr, )(i) diff --git a/rinja_parser/src/lib.rs b/rinja_parser/src/lib.rs index b3425738..c31e7726 100644 --- a/rinja_parser/src/lib.rs +++ b/rinja_parser/src/lib.rs @@ -12,7 +12,7 @@ use std::{fmt, str}; use nom::branch::alt; use nom::bytes::complete::{escaped, is_not, tag, take_till, take_while_m_n}; use nom::character::complete::{anychar, char, one_of, satisfy}; -use nom::combinator::{complete, cut, eof, fail, map, not, opt, recognize}; +use nom::combinator::{complete, consumed, cut, eof, fail, map, not, opt, recognize, value}; use nom::error::{ErrorKind, FromExternalError}; use nom::multi::{many0_count, many1}; use nom::sequence::{delimited, pair, preceded, terminated, tuple}; @@ -340,54 +340,91 @@ fn bool_lit(i: &str) -> ParseResult<'_> { alt((keyword("false"), keyword("true")))(i) } -fn num_lit<'a>(i: &'a str) -> ParseResult<'a> { - fn suffix<'a>( - start: &'a str, +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Num<'a> { + Int(&'a str, Option), + Float(&'a str, Option), +} + +fn num_lit<'a>(start: &'a str) -> ParseResult<'a, Num<'a>> { + fn num_lit_suffix<'a, T: Copy>( kind: &'a str, - list: &'a [&str], - ) -> impl Fn(&'a str) -> ParseResult<'a, ()> + Copy + 'a { - move |i| { - let (i, suffix) = identifier(i)?; - if list.contains(&suffix) { - Ok((i, ())) - } else { - Err(nom::Err::Failure(ErrorContext::new( - format!("unknown {kind} suffix `{suffix}`"), - start, - ))) - } + list: &[(&str, T)], + start: &'a str, + i: &'a str, + ) -> ParseResult<'a, T> { + let (i, suffix) = identifier(i)?; + if let Some(value) = list + .iter() + .copied() + .find_map(|(name, value)| (name == suffix).then_some(value)) + { + Ok((i, value)) + } else { + Err(nom::Err::Failure(ErrorContext::new( + format!("unknown {kind} suffix `{suffix}`"), + start, + ))) } } - let float = |start: &'a str| -> ParseResult<'a, ()> { - let (i, has_dot) = opt(tuple((char('.'), separated_digits(10, true))))(start)?; - let (i, has_exp) = opt(pair(one_of("eE"), opt(one_of("+-"))))(i)?; - if has_dot.is_none() && has_exp.is_none() { - return fail(start); + let int_with_base = pair(opt(char('-')), |i| { + let (i, (kind, base)) = consumed(preceded( + char('0'), + alt(( + value(2, char('b')), + value(8, char('o')), + value(16, char('x')), + )), + ))(i)?; + match opt(separated_digits(base, false))(i)? { + (i, Some(_)) => Ok((i, ())), + (_, None) => Err(nom::Err::Failure(ErrorContext::new( + format!("expected digits after `{kind}`"), + start, + ))), + } + }); + + let float = |i: &'a str| -> ParseResult<'a, ()> { + let (i, has_dot) = opt(pair(char('.'), separated_digits(10, true)))(i)?; + let (i, has_exp) = opt(|i| { + let (i, (kind, op)) = pair(one_of("eE"), opt(one_of("+-")))(i)?; + match opt(separated_digits(10, op.is_none()))(i)? { + (i, Some(_)) => Ok((i, ())), + (_, None) => Err(nom::Err::Failure(ErrorContext::new( + format!("expected decimal digits, `+` or `-` after exponent `{kind}`"), + start, + ))), + } + })(i)?; + match (has_dot, has_exp) { + (Some(_), _) | (_, Some(_)) => Ok((i, ())), + _ => fail(start), } - let (i, _) = cut(separated_digits(10, false))(i)?; - let (i, _) = opt(suffix(i, "float", FLOAT_TYPES))(i)?; - Ok((i, ())) }; - recognize(tuple(( - opt(char('-')), - alt(( - recognize(tuple(( - char('0'), - alt(( - recognize(tuple((char('b'), separated_digits(2, false)))), - recognize(tuple((char('o'), separated_digits(8, false)))), - recognize(tuple((char('x'), separated_digits(16, false)))), - )), - opt(suffix(i, "integer", INTEGER_TYPES)), - ))), - recognize(tuple(( - separated_digits(10, true), - opt(alt((float, suffix(i, "number", NUM_TYPES)))), - ))), - )), - )))(i) + let (i, num) = if let Ok((i, Some(num))) = opt(recognize(int_with_base))(start) { + let (i, suffix) = opt(|i| num_lit_suffix("integer", INTEGER_TYPES, start, i))(i)?; + (i, Num::Int(num, suffix)) + } else { + let (i, (num, float)) = consumed(preceded( + pair(opt(char('-')), separated_digits(10, true)), + opt(float), + ))(start)?; + if float.is_some() { + let (i, suffix) = opt(|i| num_lit_suffix("float", FLOAT_TYPES, start, i))(i)?; + (i, Num::Float(num, suffix)) + } else { + let (i, suffix) = opt(|i| num_lit_suffix("number", NUM_TYPES, start, i))(i)?; + match suffix { + Some(NumKind::Int(kind)) => (i, Num::Int(num, Some(kind))), + Some(NumKind::Float(kind)) => (i, Num::Float(num, Some(kind))), + None => (i, Num::Int(num, None)), + } + } + }; + Ok((i, num)) } /// Underscore separated digits of the given base, unless `start` is true this may start @@ -922,27 +959,75 @@ pub fn strip_common(base: &Path, path: &Path) -> String { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum IntKind { + I8, + I16, + I32, + I64, + I128, + Isize, + U8, + U16, + U32, + U64, + U128, + Usize, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum FloatKind { + F16, + F32, + F64, + F128, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum NumKind { + Int(IntKind), + Float(FloatKind), +} + /// Primitive integer types. Also used as number suffixes. -const INTEGER_TYPES: &[&str] = &[ - "i8", "i16", "i32", "i64", "i128", "isize", "u8", "u16", "u32", "u64", "u128", "usize", +const INTEGER_TYPES: &[(&str, IntKind)] = &[ + ("i8", IntKind::I8), + ("i16", IntKind::I16), + ("i32", IntKind::I32), + ("i64", IntKind::I64), + ("i128", IntKind::I128), + ("isize", IntKind::Isize), + ("u8", IntKind::U8), + ("u16", IntKind::U16), + ("u32", IntKind::U32), + ("u64", IntKind::U64), + ("u128", IntKind::U128), + ("usize", IntKind::Usize), ]; /// Primitive floating point types. Also used as number suffixes. -const FLOAT_TYPES: &[&str] = &["f16", "f32", "f64", "f128"]; +const FLOAT_TYPES: &[(&str, FloatKind)] = &[ + ("f16", FloatKind::F16), + ("f32", FloatKind::F32), + ("f64", FloatKind::F64), + ("f128", FloatKind::F128), +]; /// Primitive numeric types. Also used as number suffixes. -const NUM_TYPES: &[&str] = &{ - let mut list = [""; INTEGER_TYPES.len() + FLOAT_TYPES.len()]; +const NUM_TYPES: &[(&str, NumKind)] = &{ + let mut list = [("", NumKind::Int(IntKind::I8)); INTEGER_TYPES.len() + FLOAT_TYPES.len()]; let mut i = 0; let mut o = 0; while i < INTEGER_TYPES.len() { - list[o] = INTEGER_TYPES[i]; + let (name, value) = INTEGER_TYPES[i]; + list[o] = (name, NumKind::Int(value)); i += 1; o += 1; } let mut i = 0; while i < FLOAT_TYPES.len() { - list[o] = FLOAT_TYPES[i]; + let (name, value) = FLOAT_TYPES[i]; + list[o] = (name, NumKind::Float(value)); i += 1; o += 1; } @@ -955,7 +1040,7 @@ const PRIMITIVE_TYPES: &[&str] = &{ let mut i = 0; let mut o = 0; while i < NUM_TYPES.len() { - list[o] = NUM_TYPES[i]; + list[o] = NUM_TYPES[i].0; i += 1; o += 1; } @@ -968,7 +1053,7 @@ const PRIMITIVE_TYPES: &[&str] = &{ mod test { use std::path::Path; - use super::{char_lit, num_lit, str_lit, strip_common, StrLit, StrPrefix}; + use super::*; #[test] fn test_strip_common() { @@ -1001,14 +1086,36 @@ mod test { // Should fail. assert!(num_lit(".").is_err()); // Should succeed. - assert_eq!(num_lit("1.2E-02").unwrap(), ("", "1.2E-02")); + assert_eq!( + num_lit("1.2E-02").unwrap(), + ("", Num::Float("1.2E-02", None)) + ); // Not supported because Rust wants a number before the `.`. assert!(num_lit(".1").is_err()); assert!(num_lit(".1E-02").is_err()); + // A `_` directly after the `.` denotes a field. + assert_eq!(num_lit("1._0").unwrap(), ("._0", Num::Int("1", None))); + assert_eq!(num_lit("1_.0").unwrap(), ("", Num::Float("1_.0", None))); // Not supported (voluntarily because of `1..` syntax). - assert_eq!(num_lit("1.").unwrap(), (".", "1")); - assert_eq!(num_lit("1_.").unwrap(), (".", "1_")); - assert_eq!(num_lit("1_2.").unwrap(), (".", "1_2")); + assert_eq!(num_lit("1.").unwrap(), (".", Num::Int("1", None))); + assert_eq!(num_lit("1_.").unwrap(), (".", Num::Int("1_", None))); + assert_eq!(num_lit("1_2.").unwrap(), (".", Num::Int("1_2", None))); + // Numbers with suffixes + assert_eq!( + num_lit("-1usize").unwrap(), + ("", Num::Int("-1", Some(IntKind::Usize))) + ); + assert_eq!( + num_lit("123_f32").unwrap(), + ("", Num::Float("123_", Some(FloatKind::F32))) + ); + assert_eq!( + num_lit("1_.2_e+_3_f64|into_isize").unwrap(), + ( + "|into_isize", + Num::Float("1_.2_e+_3_", Some(FloatKind::F64)) + ) + ); } #[test] diff --git a/rinja_parser/src/target.rs b/rinja_parser/src/target.rs index 0a02e00d..46372c2f 100644 --- a/rinja_parser/src/target.rs +++ b/rinja_parser/src/target.rs @@ -7,7 +7,7 @@ use nom::sequence::{pair, preceded, tuple}; use crate::{ bool_lit, char_lit, identifier, keyword, num_lit, path_or_identifier, str_lit, ws, CharLit, - ErrorContext, ParseErr, ParseResult, PathOrIdentifier, State, StrLit, WithSpan, + ErrorContext, Num, ParseErr, ParseResult, PathOrIdentifier, State, StrLit, WithSpan, }; #[derive(Clone, Debug, PartialEq)] @@ -16,7 +16,7 @@ pub enum Target<'a> { Tuple(Vec<&'a str>, Vec>), Array(Vec<&'a str>, Vec>), Struct(Vec<&'a str>, Vec<(&'a str, Target<'a>)>), - NumLit(&'a str), + NumLit(&'a str, Num<'a>), StrLit(StrLit<'a>), CharLit(CharLit<'a>), BoolLit(&'a str), @@ -118,7 +118,7 @@ impl<'a> Target<'a> { alt(( map(str_lit, Self::StrLit), map(char_lit, Self::CharLit), - map(num_lit, Self::NumLit), + map(consumed(num_lit), |(full, num)| Target::NumLit(full, num)), map(bool_lit, Self::BoolLit), ))(i) } diff --git a/rinja_parser/src/tests.rs b/rinja_parser/src/tests.rs index 26be23a4..9dd5b876 100644 --- a/rinja_parser/src/tests.rs +++ b/rinja_parser/src/tests.rs @@ -1,5 +1,5 @@ use crate::node::{Lit, Whitespace, Ws}; -use crate::{Ast, Expr, Filter, InnerSyntax, Node, StrLit, Syntax, WithSpan}; +use crate::{Ast, Expr, Filter, InnerSyntax, Node, Num, StrLit, Syntax, WithSpan}; impl WithSpan<'static, T> { fn no_span(inner: T) -> Self { @@ -29,6 +29,10 @@ fn test_invalid_block() { Ast::from_str("{% extend \"blah\" %}", None, &Syntax::default()).unwrap(); } +fn int_lit(i: &str) -> Expr<'_> { + Expr::NumLit(i, Num::Int(i, None)) +} + #[test] fn test_parse_filter() { let syntax = Syntax::default(); @@ -50,7 +54,7 @@ fn test_parse_filter() { Ws(None, None), WithSpan::no_span(Expr::Filter(Filter { name: "abs", - arguments: vec![WithSpan::no_span(Expr::NumLit("2"))] + arguments: vec![WithSpan::no_span(int_lit("2"))] })), )], ); @@ -62,7 +66,7 @@ fn test_parse_filter() { name: "abs", arguments: vec![WithSpan::no_span(Expr::Unary( "-", - WithSpan::no_span(Expr::NumLit("2")).into() + WithSpan::no_span(int_lit("2")).into() ))] })), )], @@ -78,8 +82,8 @@ fn test_parse_filter() { arguments: vec![WithSpan::no_span(Expr::Group( WithSpan::no_span(Expr::BinOp( "-", - WithSpan::no_span(Expr::NumLit("1")).into(), - WithSpan::no_span(Expr::NumLit("2")).into() + WithSpan::no_span(int_lit("1")).into(), + WithSpan::no_span(int_lit("2")).into() )) .into() ))], @@ -93,16 +97,13 @@ fn test_parse_numbers() { let syntax = Syntax::default(); assert_eq!( Ast::from_str("{{ 2 }}", None, &syntax).unwrap().nodes, - vec![Node::Expr( - Ws(None, None), - WithSpan::no_span(Expr::NumLit("2")) - )], + vec![Node::Expr(Ws(None, None), WithSpan::no_span(int_lit("2")))], ); assert_eq!( Ast::from_str("{{ 2.5 }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::NumLit("2.5")) + WithSpan::no_span(Expr::NumLit("2.5", Num::Float("2.5", None))) )], ); } @@ -180,7 +181,7 @@ fn test_parse_path() { Ws(None, None), WithSpan::no_span(Expr::Call( Box::new(WithSpan::no_span(Expr::Path(vec!["Some"]))), - vec![WithSpan::no_span(Expr::NumLit("123"))] + vec![WithSpan::no_span(int_lit("123"))] )), )], ); @@ -191,7 +192,7 @@ fn test_parse_path() { Ws(None, None), WithSpan::no_span(Expr::Call( Box::new(WithSpan::no_span(Expr::Path(vec!["Ok"]))), - vec![WithSpan::no_span(Expr::NumLit("123"))] + vec![WithSpan::no_span(int_lit("123"))] )), )], ); @@ -201,7 +202,7 @@ fn test_parse_path() { Ws(None, None), WithSpan::no_span(Expr::Call( Box::new(WithSpan::no_span(Expr::Path(vec!["Err"]))), - vec![WithSpan::no_span(Expr::NumLit("123"))] + vec![WithSpan::no_span(int_lit("123"))] )), )], ); @@ -222,7 +223,7 @@ fn test_parse_var_call() { content: "123", prefix: None, })), - WithSpan::no_span(Expr::NumLit("3")) + WithSpan::no_span(int_lit("3")) ] )), )], @@ -248,7 +249,7 @@ fn test_parse_path_call() { Ws(None, None), WithSpan::no_span(Expr::Call( Box::new(WithSpan::no_span(Expr::Path(vec!["Option", "Some"]))), - vec![WithSpan::no_span(Expr::NumLit("123"))], + vec![WithSpan::no_span(int_lit("123"))], ),) )], ); @@ -266,7 +267,7 @@ fn test_parse_path_call() { content: "123", prefix: None, })), - WithSpan::no_span(Expr::NumLit("3")) + WithSpan::no_span(int_lit("3")) ], ),) )], @@ -791,35 +792,35 @@ fn test_parse_tuple() { Ast::from_str("{{ (1) }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::Group(Box::new(WithSpan::no_span(Expr::NumLit("1"))),)) + WithSpan::no_span(Expr::Group(Box::new(WithSpan::no_span(int_lit("1"))),)) )], ); assert_eq!( Ast::from_str("{{ (1,) }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span(Expr::NumLit("1"))])), + WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span(int_lit("1"))])), )], ); assert_eq!( Ast::from_str("{{ (1, ) }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span(Expr::NumLit("1"))])), + WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span(int_lit("1"))])), )], ); assert_eq!( Ast::from_str("{{ (1 ,) }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span(Expr::NumLit("1"))])), + WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span(int_lit("1"))])), )], ); assert_eq!( Ast::from_str("{{ (1 , ) }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span(Expr::NumLit("1"))])), + WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span(int_lit("1"))])), )], ); assert_eq!( @@ -827,8 +828,8 @@ fn test_parse_tuple() { vec![Node::Expr( Ws(None, None), WithSpan::no_span(Expr::Tuple(vec![ - WithSpan::no_span(Expr::NumLit("1")), - WithSpan::no_span(Expr::NumLit("2")) + WithSpan::no_span(int_lit("1")), + WithSpan::no_span(int_lit("2")) ])), )], ); @@ -837,8 +838,8 @@ fn test_parse_tuple() { vec![Node::Expr( Ws(None, None), WithSpan::no_span(Expr::Tuple(vec![ - WithSpan::no_span(Expr::NumLit("1")), - WithSpan::no_span(Expr::NumLit("2")) + WithSpan::no_span(int_lit("1")), + WithSpan::no_span(int_lit("2")) ])), )], ); @@ -849,9 +850,9 @@ fn test_parse_tuple() { vec![Node::Expr( Ws(None, None), WithSpan::no_span(Expr::Tuple(vec![ - WithSpan::no_span(Expr::NumLit("1")), - WithSpan::no_span(Expr::NumLit("2")), - WithSpan::no_span(Expr::NumLit("3")) + WithSpan::no_span(int_lit("1")), + WithSpan::no_span(int_lit("2")), + WithSpan::no_span(int_lit("3")) ])), )], ); @@ -872,7 +873,7 @@ fn test_parse_tuple() { WithSpan::no_span(Expr::Filter(Filter { name: "abs", arguments: vec![WithSpan::no_span(Expr::Group(Box::new(WithSpan::no_span( - Expr::NumLit("1") + int_lit("1") ))))] })), )], @@ -886,7 +887,7 @@ fn test_parse_tuple() { WithSpan::no_span(Expr::Filter(Filter { name: "abs", arguments: vec![WithSpan::no_span(Expr::Tuple(vec![WithSpan::no_span( - Expr::NumLit("1") + int_lit("1") )]))] })), )], @@ -900,8 +901,8 @@ fn test_parse_tuple() { WithSpan::no_span(Expr::Filter(Filter { name: "abs", arguments: vec![WithSpan::no_span(Expr::Tuple(vec![ - WithSpan::no_span(Expr::NumLit("1")), - WithSpan::no_span(Expr::NumLit("2")) + WithSpan::no_span(int_lit("1")), + WithSpan::no_span(int_lit("2")) ]))] })), )], @@ -932,21 +933,21 @@ fn test_parse_array() { Ast::from_str("{{ [1] }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::Array(vec![WithSpan::no_span(Expr::NumLit("1"))])) + WithSpan::no_span(Expr::Array(vec![WithSpan::no_span(int_lit("1"))])) )], ); assert_eq!( Ast::from_str("{{ [ 1] }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::Array(vec![WithSpan::no_span(Expr::NumLit("1"))])) + WithSpan::no_span(Expr::Array(vec![WithSpan::no_span(int_lit("1"))])) )], ); assert_eq!( Ast::from_str("{{ [1 ] }}", None, &syntax).unwrap().nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::Array(vec![WithSpan::no_span(Expr::NumLit("1"))])) + WithSpan::no_span(Expr::Array(vec![WithSpan::no_span(int_lit("1"))])) )], ); assert_eq!( @@ -954,8 +955,8 @@ fn test_parse_array() { vec![Node::Expr( Ws(None, None), WithSpan::no_span(Expr::Array(vec![ - WithSpan::no_span(Expr::NumLit("1")), - WithSpan::no_span(Expr::NumLit("2")) + WithSpan::no_span(int_lit("1")), + WithSpan::no_span(int_lit("2")) ])) )], ); @@ -964,8 +965,8 @@ fn test_parse_array() { vec![Node::Expr( Ws(None, None), WithSpan::no_span(Expr::Array(vec![ - WithSpan::no_span(Expr::NumLit("1")), - WithSpan::no_span(Expr::NumLit("2")) + WithSpan::no_span(int_lit("1")), + WithSpan::no_span(int_lit("2")) ])) )], ); @@ -974,8 +975,8 @@ fn test_parse_array() { vec![Node::Expr( Ws(None, None), WithSpan::no_span(Expr::Array(vec![ - WithSpan::no_span(Expr::NumLit("1")), - WithSpan::no_span(Expr::NumLit("2")) + WithSpan::no_span(int_lit("1")), + WithSpan::no_span(int_lit("2")) ])) )], ); @@ -984,8 +985,8 @@ fn test_parse_array() { vec![Node::Expr( Ws(None, None), WithSpan::no_span(Expr::Array(vec![ - WithSpan::no_span(Expr::NumLit("1")), - WithSpan::no_span(Expr::NumLit("2")) + WithSpan::no_span(int_lit("1")), + WithSpan::no_span(int_lit("2")) ])) )], ); @@ -1076,7 +1077,7 @@ fn four_thousand() { .nodes, vec![Node::Expr( Ws(None, None), - WithSpan::no_span(Expr::NumLit("4e3")), + WithSpan::no_span(Expr::NumLit("4e3", Num::Float("4e3", None))), )], ); } From 2e342f2e47c9eedf8356ca200f6ea8a882112f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Mon, 19 Aug 2024 05:26:16 +0200 Subject: [PATCH 5/7] derive: escape more literal types at compile time --- rinja_derive/src/generator.rs | 135 +++++++++++++++++++++++++--------- rinja_derive/src/tests.rs | 19 +---- 2 files changed, 103 insertions(+), 51 deletions(-) diff --git a/rinja_derive/src/generator.rs b/rinja_derive/src/generator.rs index 0697a882..478fee2a 100644 --- a/rinja_derive/src/generator.rs +++ b/rinja_derive/src/generator.rs @@ -9,7 +9,10 @@ use std::{cmp, hash, mem, str}; use parser::node::{ Call, Comment, Cond, CondTest, FilterBlock, If, Include, Let, Lit, Loop, Match, Whitespace, Ws, }; -use parser::{CharLit, CharPrefix, Expr, Filter, Node, StrLit, StrPrefix, Target, WithSpan}; +use parser::{ + CharLit, CharPrefix, Expr, Filter, FloatKind, IntKind, Node, Num, StrLit, StrPrefix, Target, + WithSpan, +}; use quote::quote; use rustc_hash::FxBuildHasher; @@ -1260,17 +1263,24 @@ impl<'a> Generator<'a> { // can be escaped at compile time. We use an IIFE to make the code more readable // (immediate returns, try expressions). let writable = (|| -> Option> { - enum InputKind<'a> { - StrLit(&'a str), - CharLit(&'a str), - } + // we only optimize for known escapers enum OutputKind { Html, Text, } + let output = match self.input.escaper.strip_prefix(CRATE)? { + "::filters::Html" => OutputKind::Html, + "::filters::Text" => OutputKind::Text, + _ => return None, + }; // for now, we only escape strings and chars at compile time - let lit = match &**s { + enum InputKind<'a> { + StrLit(&'a str), + CharLit(&'a str), + Rendered(Cow<'a, str>), + } + let lit = match **s { Expr::StrLit(StrLit { prefix: None, content, @@ -1279,40 +1289,97 @@ impl<'a> Generator<'a> { prefix: None, content, }) => InputKind::CharLit(content), - _ => return None, - }; + Expr::NumLit(_, value) => { + enum NumKind { + Int(Option), + Float(Option), + } - // we only optimize for known escapers - let output = match self.input.escaper.strip_prefix(CRATE)? { - "::filters::Html" => OutputKind::Html, - "::filters::Text" => OutputKind::Text, + let (orig_value, kind) = match value { + Num::Int(value, kind) => (value, NumKind::Int(kind)), + Num::Float(value, kind) => (value, NumKind::Float(kind)), + }; + let value = match orig_value.chars().any(|c| c == '_') { + true => Cow::Owned(orig_value.chars().filter(|&c| c != '_').collect()), + false => Cow::Borrowed(orig_value), + }; + + fn int( + from_str_radix: impl Fn(&str, u32) -> Result, + value: &str, + ) -> Option { + Some(from_str_radix(value, 10).ok()?.to_string()) + } + + let value = match kind { + NumKind::Int(Some(IntKind::I8)) => int(i8::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::I16)) => int(i16::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::I32)) => int(i32::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::I64 | IntKind::Isize)) => { + int(i64::from_str_radix, &value)? + } + NumKind::Int(Some(IntKind::I128)) => int(i128::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::U8)) => int(u8::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::U16)) => int(u16::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::U32)) => int(u32::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::U64 | IntKind::Usize)) => { + int(u64::from_str_radix, &value)? + } + NumKind::Int(Some(IntKind::U128)) => int(u128::from_str_radix, &value)?, + NumKind::Int(None) => match value.starts_with('-') { + true => int(i128::from_str_radix, &value)?, + false => int(u128::from_str_radix, &value)?, + }, + NumKind::Float(Some(FloatKind::F32)) => { + value.parse::().ok()?.to_string() + } + NumKind::Float(Some(FloatKind::F64) | None) => { + value.parse::().ok()?.to_string() + } + // implement once `f16` and `f128` are available + NumKind::Float(Some(FloatKind::F16 | FloatKind::F128)) => return None, + }; + InputKind::Rendered(match value == orig_value { + true => Cow::Borrowed(orig_value), + false => Cow::Owned(value), + }) + } + Expr::BoolLit(true) => InputKind::Rendered(Cow::Borrowed("true")), + Expr::BoolLit(false) => InputKind::Rendered(Cow::Borrowed("false")), _ => return None, }; // the input could be string escaped if it contains any backslashes - let escaped = match lit { - InputKind::StrLit(s) => s, - InputKind::CharLit(s) => s, - }; - let unescaped = if escaped.find('\\').is_none() { - // if the literal does not contain any backslashes, then it does not need unescaping - Cow::Borrowed(escaped) - } else { - // convert the input into a TokenStream and extract the first token - Cow::Owned(match lit { - InputKind::StrLit(escaped) => { - let input = format!(r#""{escaped}""#); - let input = input.parse().ok()?; - let input = syn::parse2::(input).ok()?; - input.value() + let unescaped = match lit { + InputKind::StrLit(escaped) | InputKind::CharLit(escaped) => { + if escaped.find('\\').is_none() { + // if the literal does not contain any backslashes, then it does not need unescaping + Cow::Borrowed(escaped) + } else { + // convert the input into a TokenStream and extract the first token + Cow::Owned(match lit { + InputKind::StrLit(escaped) => { + let input = format!(r#""{escaped}""#); + let input = input.parse().ok()?; + let input = syn::parse2::(input).ok()?; + input.value() + } + InputKind::CharLit(escaped) => { + let input = format!(r#"'{escaped}'"#); + let input = input.parse().ok()?; + let input = syn::parse2::(input).ok()?; + input.value().to_string() + } + InputKind::Rendered(s) => { + unreachable!( + "rendered values are known not to contain characters that need \ + escaping: {s:?}", + ); + } + }) } - InputKind::CharLit(escaped) => { - let input = format!(r#"'{escaped}'"#); - let input = input.parse().ok()?; - let input = syn::parse2::(input).ok()?; - input.value().to_string() - } - }) + } + InputKind::Rendered(s) => s, }; // escape the un-string-escaped input using the selected escaper diff --git a/rinja_derive/src/tests.rs b/rinja_derive/src/tests.rs index 6a56206c..72f25ce3 100644 --- a/rinja_derive/src/tests.rs +++ b/rinja_derive/src/tests.rs @@ -583,23 +583,8 @@ A compare( r#"{{ 1_2_3_4 }} {{ 4e3 }} {{ false }}"#, - r#"match ( - &((&&::rinja::filters::AutoEscaper::new(&(1_2_3_4), ::rinja::filters::Text)) - .rinja_auto_escape()?), - &((&&::rinja::filters::AutoEscaper::new(&(4e3), ::rinja::filters::Text)) - .rinja_auto_escape()?), - &((&&::rinja::filters::AutoEscaper::new(&(false), ::rinja::filters::Text)) - .rinja_auto_escape()?), - ) { - (expr0, expr2, expr4) => { - (&&::rinja::filters::Writable(expr0)).rinja_write(writer)?; - writer.write_str(" ")?; - (&&::rinja::filters::Writable(expr2)).rinja_write(writer)?; - writer.write_str(" ")?; - (&&::rinja::filters::Writable(expr4)).rinja_write(writer)?; - } - }"#, + r#"writer.write_str("1234 4000 false")?;"#, &[], - 11, + 15, ); } From b2b66ef29a04e111435d1e7b451c5797d4aef32a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Mon, 19 Aug 2024 05:31:59 +0200 Subject: [PATCH 6/7] derive: refactor `compile_time_escape` --- rinja_derive/src/generator.rs | 261 ++++++++++++++++------------------ 1 file changed, 122 insertions(+), 139 deletions(-) diff --git a/rinja_derive/src/generator.rs b/rinja_derive/src/generator.rs index 478fee2a..a2f99ed8 100644 --- a/rinja_derive/src/generator.rs +++ b/rinja_derive/src/generator.rs @@ -1259,146 +1259,9 @@ impl<'a> Generator<'a> { } fn write_expr(&mut self, ws: Ws, s: &'a WithSpan<'a, Expr<'a>>) { - // In here, we inspect in the expression if it is a literal, and if it is, whether it - // can be escaped at compile time. We use an IIFE to make the code more readable - // (immediate returns, try expressions). - let writable = (|| -> Option> { - // we only optimize for known escapers - enum OutputKind { - Html, - Text, - } - let output = match self.input.escaper.strip_prefix(CRATE)? { - "::filters::Html" => OutputKind::Html, - "::filters::Text" => OutputKind::Text, - _ => return None, - }; - - // for now, we only escape strings and chars at compile time - enum InputKind<'a> { - StrLit(&'a str), - CharLit(&'a str), - Rendered(Cow<'a, str>), - } - let lit = match **s { - Expr::StrLit(StrLit { - prefix: None, - content, - }) => InputKind::StrLit(content), - Expr::CharLit(CharLit { - prefix: None, - content, - }) => InputKind::CharLit(content), - Expr::NumLit(_, value) => { - enum NumKind { - Int(Option), - Float(Option), - } - - let (orig_value, kind) = match value { - Num::Int(value, kind) => (value, NumKind::Int(kind)), - Num::Float(value, kind) => (value, NumKind::Float(kind)), - }; - let value = match orig_value.chars().any(|c| c == '_') { - true => Cow::Owned(orig_value.chars().filter(|&c| c != '_').collect()), - false => Cow::Borrowed(orig_value), - }; - - fn int( - from_str_radix: impl Fn(&str, u32) -> Result, - value: &str, - ) -> Option { - Some(from_str_radix(value, 10).ok()?.to_string()) - } - - let value = match kind { - NumKind::Int(Some(IntKind::I8)) => int(i8::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::I16)) => int(i16::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::I32)) => int(i32::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::I64 | IntKind::Isize)) => { - int(i64::from_str_radix, &value)? - } - NumKind::Int(Some(IntKind::I128)) => int(i128::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::U8)) => int(u8::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::U16)) => int(u16::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::U32)) => int(u32::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::U64 | IntKind::Usize)) => { - int(u64::from_str_radix, &value)? - } - NumKind::Int(Some(IntKind::U128)) => int(u128::from_str_radix, &value)?, - NumKind::Int(None) => match value.starts_with('-') { - true => int(i128::from_str_radix, &value)?, - false => int(u128::from_str_radix, &value)?, - }, - NumKind::Float(Some(FloatKind::F32)) => { - value.parse::().ok()?.to_string() - } - NumKind::Float(Some(FloatKind::F64) | None) => { - value.parse::().ok()?.to_string() - } - // implement once `f16` and `f128` are available - NumKind::Float(Some(FloatKind::F16 | FloatKind::F128)) => return None, - }; - InputKind::Rendered(match value == orig_value { - true => Cow::Borrowed(orig_value), - false => Cow::Owned(value), - }) - } - Expr::BoolLit(true) => InputKind::Rendered(Cow::Borrowed("true")), - Expr::BoolLit(false) => InputKind::Rendered(Cow::Borrowed("false")), - _ => return None, - }; - - // the input could be string escaped if it contains any backslashes - let unescaped = match lit { - InputKind::StrLit(escaped) | InputKind::CharLit(escaped) => { - if escaped.find('\\').is_none() { - // if the literal does not contain any backslashes, then it does not need unescaping - Cow::Borrowed(escaped) - } else { - // convert the input into a TokenStream and extract the first token - Cow::Owned(match lit { - InputKind::StrLit(escaped) => { - let input = format!(r#""{escaped}""#); - let input = input.parse().ok()?; - let input = syn::parse2::(input).ok()?; - input.value() - } - InputKind::CharLit(escaped) => { - let input = format!(r#"'{escaped}'"#); - let input = input.parse().ok()?; - let input = syn::parse2::(input).ok()?; - input.value().to_string() - } - InputKind::Rendered(s) => { - unreachable!( - "rendered values are known not to contain characters that need \ - escaping: {s:?}", - ); - } - }) - } - } - InputKind::Rendered(s) => s, - }; - - // escape the un-string-escaped input using the selected escaper - Some(Writable::Lit(match output { - OutputKind::Text => unescaped, - OutputKind::Html => { - let mut escaped = String::with_capacity(unescaped.len() + 20); - write_escaped_str(&mut escaped, &unescaped).ok()?; - match escaped == unescaped { - true => unescaped, - false => Cow::Owned(escaped), - } - } - })) - })() - .unwrap_or(Writable::Expr(s)); - self.handle_ws(ws); - self.buf_writable.push(writable); + self.buf_writable + .push(compile_time_escape(s, self.input.escaper).unwrap_or(Writable::Expr(s))); } // Write expression buffer and empty @@ -2347,6 +2210,126 @@ impl<'a> Generator<'a> { } } +/// In here, we inspect in the expression if it is a literal, and if it is, whether it +/// can be escaped at compile time. +fn compile_time_escape<'a>(expr: &Expr<'a>, escaper: &str) -> Option> { + // we only optimize for known escapers + enum OutputKind { + Html, + Text, + } + + // we only optimize for known escapers + let output = match escaper.strip_prefix(CRATE)? { + "::filters::Html" => OutputKind::Html, + "::filters::Text" => OutputKind::Text, + _ => return None, + }; + + // for now, we only escape strings, chars, numbers, and bools at compile time + let value = match *expr { + Expr::StrLit(StrLit { + prefix: None, + content, + }) => { + if content.find('\\').is_none() { + // if the literal does not contain any backslashes, then it does not need unescaping + Cow::Borrowed(content) + } else { + // the input could be string escaped if it contains any backslashes + let input = format!(r#""{content}""#); + let input = input.parse().ok()?; + let input = syn::parse2::(input).ok()?; + Cow::Owned(input.value()) + } + } + Expr::CharLit(CharLit { + prefix: None, + content, + }) => { + if content.find('\\').is_none() { + // if the literal does not contain any backslashes, then it does not need unescaping + Cow::Borrowed(content) + } else { + // the input could be string escaped if it contains any backslashes + let input = format!(r#"'{content}'"#); + let input = input.parse().ok()?; + let input = syn::parse2::(input).ok()?; + Cow::Owned(input.value().to_string()) + } + } + Expr::NumLit(_, value) => { + enum NumKind { + Int(Option), + Float(Option), + } + + let (orig_value, kind) = match value { + Num::Int(value, kind) => (value, NumKind::Int(kind)), + Num::Float(value, kind) => (value, NumKind::Float(kind)), + }; + let value = match orig_value.chars().any(|c| c == '_') { + true => Cow::Owned(orig_value.chars().filter(|&c| c != '_').collect()), + false => Cow::Borrowed(orig_value), + }; + + fn int( + from_str_radix: impl Fn(&str, u32) -> Result, + value: &str, + ) -> Option { + Some(from_str_radix(value, 10).ok()?.to_string()) + } + + let value = match kind { + NumKind::Int(Some(IntKind::I8)) => int(i8::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::I16)) => int(i16::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::I32)) => int(i32::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::I64 | IntKind::Isize)) => { + int(i64::from_str_radix, &value)? + } + NumKind::Int(Some(IntKind::I128)) => int(i128::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::U8)) => int(u8::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::U16)) => int(u16::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::U32)) => int(u32::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::U64 | IntKind::Usize)) => { + int(u64::from_str_radix, &value)? + } + NumKind::Int(Some(IntKind::U128)) => int(u128::from_str_radix, &value)?, + NumKind::Int(None) => match value.starts_with('-') { + true => int(i128::from_str_radix, &value)?, + false => int(u128::from_str_radix, &value)?, + }, + NumKind::Float(Some(FloatKind::F32)) => value.parse::().ok()?.to_string(), + NumKind::Float(Some(FloatKind::F64) | None) => { + value.parse::().ok()?.to_string() + } + // implement once `f16` and `f128` are available + NumKind::Float(Some(FloatKind::F16 | FloatKind::F128)) => return None, + }; + match value == orig_value { + true => Cow::Borrowed(orig_value), + false => Cow::Owned(value), + } + } + Expr::BoolLit(true) => Cow::Borrowed("true"), + Expr::BoolLit(false) => Cow::Borrowed("false"), + _ => return None, + }; + + // escape the un-string-escaped input using the selected escaper + Some(Writable::Lit(match output { + OutputKind::Text => value, + OutputKind::Html => { + let mut escaped = String::with_capacity(value.len() + 20); + write_escaped_str(&mut escaped, &value).ok()?; + match escaped == value { + true => value, + false => Cow::Owned(escaped), + } + } + })) +} + #[derive(Debug)] struct Buffer { // The buffer to generate the code into From d8f2341e46592a4073dad0c3427615e8e95cdb47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Mon, 19 Aug 2024 14:33:34 +0200 Subject: [PATCH 7/7] Minor fix ups --- rinja_derive/src/generator.rs | 32 +++++++++++++++++++++++++------- rinja_parser/src/lib.rs | 9 +++++++++ rinja_parser/src/tests.rs | 13 ------------- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/rinja_derive/src/generator.rs b/rinja_derive/src/generator.rs index a2f99ed8..82554f45 100644 --- a/rinja_derive/src/generator.rs +++ b/rinja_derive/src/generator.rs @@ -2284,17 +2284,35 @@ fn compile_time_escape<'a>(expr: &Expr<'a>, escaper: &str) -> Option int(i8::from_str_radix, &value)?, NumKind::Int(Some(IntKind::I16)) => int(i16::from_str_radix, &value)?, NumKind::Int(Some(IntKind::I32)) => int(i32::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::I64 | IntKind::Isize)) => { - int(i64::from_str_radix, &value)? - } + NumKind::Int(Some(IntKind::I64)) => int(i64::from_str_radix, &value)?, NumKind::Int(Some(IntKind::I128)) => int(i128::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::Isize)) => { + if cfg!(target_pointer_width = "16") { + int(i16::from_str_radix, &value)? + } else if cfg!(target_pointer_width = "32") { + int(i32::from_str_radix, &value)? + } else if cfg!(target_pointer_width = "64") { + int(i64::from_str_radix, &value)? + } else { + unreachable!("unexpected `cfg!(target_pointer_width)`") + } + } NumKind::Int(Some(IntKind::U8)) => int(u8::from_str_radix, &value)?, NumKind::Int(Some(IntKind::U16)) => int(u16::from_str_radix, &value)?, NumKind::Int(Some(IntKind::U32)) => int(u32::from_str_radix, &value)?, - NumKind::Int(Some(IntKind::U64 | IntKind::Usize)) => { - int(u64::from_str_radix, &value)? - } + NumKind::Int(Some(IntKind::U64)) => int(u64::from_str_radix, &value)?, NumKind::Int(Some(IntKind::U128)) => int(u128::from_str_radix, &value)?, + NumKind::Int(Some(IntKind::Usize)) => { + if cfg!(target_pointer_width = "16") { + int(u16::from_str_radix, &value)? + } else if cfg!(target_pointer_width = "32") { + int(u32::from_str_radix, &value)? + } else if cfg!(target_pointer_width = "64") { + int(u64::from_str_radix, &value)? + } else { + unreachable!("unexpected `cfg!(target_pointer_width)`") + } + } NumKind::Int(None) => match value.starts_with('-') { true => int(i128::from_str_radix, &value)?, false => int(u128::from_str_radix, &value)?, @@ -2303,7 +2321,7 @@ fn compile_time_escape<'a>(expr: &Expr<'a>, escaper: &str) -> Option { value.parse::().ok()?.to_string() } - // implement once `f16` and `f128` are available + // FIXME: implement once `f16` and `f128` are available NumKind::Float(Some(FloatKind::F16 | FloatKind::F128)) => return None, }; match value == orig_value { diff --git a/rinja_parser/src/lib.rs b/rinja_parser/src/lib.rs index c31e7726..94df31a1 100644 --- a/rinja_parser/src/lib.rs +++ b/rinja_parser/src/lib.rs @@ -368,6 +368,7 @@ fn num_lit<'a>(start: &'a str) -> ParseResult<'a, Num<'a>> { } } + // Equivalent to . let int_with_base = pair(opt(char('-')), |i| { let (i, (kind, base)) = consumed(preceded( char('0'), @@ -386,6 +387,8 @@ fn num_lit<'a>(start: &'a str) -> ParseResult<'a, Num<'a>> { } }); + // Equivalent to : + // no `_` directly after the decimal point `.`, or between `e` and `+/-`. let float = |i: &'a str| -> ParseResult<'a, ()> { let (i, has_dot) = opt(pair(char('.'), separated_digits(10, true)))(i)?; let (i, has_exp) = opt(|i| { @@ -1090,6 +1093,8 @@ mod test { num_lit("1.2E-02").unwrap(), ("", Num::Float("1.2E-02", None)) ); + assert_eq!(num_lit("4e3").unwrap(), ("", Num::Float("4e3", None)),); + assert_eq!(num_lit("4e+_3").unwrap(), ("", Num::Float("4e+_3", None)),); // Not supported because Rust wants a number before the `.`. assert!(num_lit(".1").is_err()); assert!(num_lit(".1E-02").is_err()); @@ -1116,6 +1121,10 @@ mod test { Num::Float("1_.2_e+_3_", Some(FloatKind::F64)) ) ); + assert_eq!( + num_lit("4e3f128").unwrap(), + ("", Num::Float("4e3", Some(FloatKind::F128))), + ); } #[test] diff --git a/rinja_parser/src/tests.rs b/rinja_parser/src/tests.rs index 9dd5b876..6093d09b 100644 --- a/rinja_parser/src/tests.rs +++ b/rinja_parser/src/tests.rs @@ -1068,16 +1068,3 @@ fn fuzzed_filter_recursion() { const TEMPLATE: &str = include_str!("../tests/filter-recursion.txt"); assert!(Ast::from_str(TEMPLATE, None, &Syntax::default()).is_err()); } - -#[test] -fn four_thousand() { - assert_eq!( - Ast::from_str("{{4e3}}", None, &Syntax::default()) - .unwrap() - .nodes, - vec![Node::Expr( - Ws(None, None), - WithSpan::no_span(Expr::NumLit("4e3", Num::Float("4e3", None))), - )], - ); -}