From 1ac0c3514468b44e3464598ba6ac916fa69718ec Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 7 Nov 2024 13:54:48 -0600 Subject: [PATCH] refactor(parser): Clean up prep for 0.5 (#226) * refactor(parser): Rely on Parser trait rather than FnMut * refactor(parser): Consistently use ParseResult * refactor(parser): Accept Parser rather than FnMut * refactor(parser): Chain the input through * refactor(parser): Accept stateful Parser rather than stateless FnMut * refactor(parser): Remove redundant closures --- rinja_parser/src/lib.rs | 129 +++++++++++++++++++++++-------------- rinja_parser/src/node.rs | 4 +- rinja_parser/src/target.rs | 31 ++++----- 3 files changed, 96 insertions(+), 68 deletions(-) diff --git a/rinja_parser/src/lib.rs b/rinja_parser/src/lib.rs index 8e6e633e..02de1275 100644 --- a/rinja_parser/src/lib.rs +++ b/rinja_parser/src/lib.rs @@ -474,7 +474,7 @@ fn str_lit_without_prefix(i: &str) -> ParseResult<'_> { Ok((i, s.unwrap_or_default())) } -fn str_lit(i: &str) -> Result<(&str, StrLit<'_>), ParseErr<'_>> { +fn str_lit(i: &str) -> ParseResult<'_, StrLit<'_>> { let (i, (prefix, content)) = (opt(alt(('b', 'c'))), str_lit_without_prefix).parse_next(i)?; let prefix = match prefix { Some('b') => Some(StrPrefix::Binary), @@ -497,7 +497,7 @@ pub struct CharLit<'a> { // Information about allowed character escapes is available at: // . -fn char_lit(i: &str) -> Result<(&str, CharLit<'_>), ParseErr<'_>> { +fn char_lit(i: &str) -> ParseResult<'_, CharLit<'_>> { let start = i; let (i, (b_prefix, s)) = ( opt('b'), @@ -653,15 +653,15 @@ impl<'a> State<'a> { } } - fn nest<'b, T, F: FnOnce(&'b str) -> ParseResult<'b, T>>( + fn nest<'b, T, F: Parser<&'b str, T, ErrorContext<'b>>>( &self, i: &'b str, - callback: F, + mut callback: F, ) -> ParseResult<'b, T> { let prev_level = self.level.get(); let (_, level) = prev_level.nest(i)?; self.level.set(level); - let ret = callback(i); + let ret = callback.parse_next(i); self.level.set(prev_level); ret } @@ -1049,42 +1049,63 @@ mod test { #[test] fn test_num_lit() { // Should fail. - assert!(num_lit(".").is_err()); + assert!(num_lit.parse_next(".").is_err()); // Should succeed. assert_eq!( - num_lit("1.2E-02").unwrap(), + num_lit.parse_next("1.2E-02").unwrap(), ("", Num::Float("1.2E-02", None)) ); - assert_eq!(num_lit("4e3").unwrap(), ("", Num::Float("4e3", None)),); - assert_eq!(num_lit("4e+_3").unwrap(), ("", Num::Float("4e+_3", None)),); + assert_eq!( + num_lit.parse_next("4e3").unwrap(), + ("", Num::Float("4e3", None)), + ); + assert_eq!( + num_lit.parse_next("4e+_3").unwrap(), + ("", Num::Float("4e+_3", None)), + ); // Not supported because Rust wants a number before the `.`. - assert!(num_lit(".1").is_err()); - assert!(num_lit(".1E-02").is_err()); + assert!(num_lit.parse_next(".1").is_err()); + assert!(num_lit.parse_next(".1E-02").is_err()); // A `_` directly after the `.` denotes a field. - assert_eq!(num_lit("1._0").unwrap(), ("._0", Num::Int("1", None))); - assert_eq!(num_lit("1_.0").unwrap(), ("", Num::Float("1_.0", None))); + assert_eq!( + num_lit.parse_next("1._0").unwrap(), + ("._0", Num::Int("1", None)) + ); + assert_eq!( + num_lit.parse_next("1_.0").unwrap(), + ("", Num::Float("1_.0", None)) + ); // Not supported (voluntarily because of `1..` syntax). - assert_eq!(num_lit("1.").unwrap(), (".", Num::Int("1", None))); - assert_eq!(num_lit("1_.").unwrap(), (".", Num::Int("1_", None))); - assert_eq!(num_lit("1_2.").unwrap(), (".", Num::Int("1_2", None))); + assert_eq!( + num_lit.parse_next("1.").unwrap(), + (".", Num::Int("1", None)) + ); + assert_eq!( + num_lit.parse_next("1_.").unwrap(), + (".", Num::Int("1_", None)) + ); + assert_eq!( + num_lit.parse_next("1_2.").unwrap(), + (".", Num::Int("1_2", None)) + ); // Numbers with suffixes assert_eq!( - num_lit("-1usize").unwrap(), + num_lit.parse_next("-1usize").unwrap(), ("", Num::Int("-1", Some(IntKind::Usize))) ); assert_eq!( - num_lit("123_f32").unwrap(), + num_lit.parse_next("123_f32").unwrap(), ("", Num::Float("123_", Some(FloatKind::F32))) ); assert_eq!( - num_lit("1_.2_e+_3_f64|into_isize").unwrap(), + num_lit.parse_next("1_.2_e+_3_f64|into_isize").unwrap(), ( "|into_isize", Num::Float("1_.2_e+_3_", Some(FloatKind::F64)) ) ); assert_eq!( - num_lit("4e3f128").unwrap(), + num_lit.parse_next("4e3f128").unwrap(), ("", Num::Float("4e3", Some(FloatKind::F128))), ); } @@ -1096,30 +1117,42 @@ mod test { content: s, }; - assert_eq!(char_lit("'a'").unwrap(), ("", lit("a"))); - assert_eq!(char_lit("'字'").unwrap(), ("", lit("字"))); + assert_eq!(char_lit.parse_next("'a'").unwrap(), ("", lit("a"))); + assert_eq!(char_lit.parse_next("'字'").unwrap(), ("", lit("字"))); // Escaped single characters. - assert_eq!(char_lit("'\\\"'").unwrap(), ("", lit("\\\""))); - assert_eq!(char_lit("'\\''").unwrap(), ("", lit("\\'"))); - assert_eq!(char_lit("'\\t'").unwrap(), ("", lit("\\t"))); - assert_eq!(char_lit("'\\n'").unwrap(), ("", lit("\\n"))); - assert_eq!(char_lit("'\\r'").unwrap(), ("", lit("\\r"))); - assert_eq!(char_lit("'\\0'").unwrap(), ("", lit("\\0"))); + assert_eq!(char_lit.parse_next("'\\\"'").unwrap(), ("", lit("\\\""))); + assert_eq!(char_lit.parse_next("'\\''").unwrap(), ("", lit("\\'"))); + assert_eq!(char_lit.parse_next("'\\t'").unwrap(), ("", lit("\\t"))); + assert_eq!(char_lit.parse_next("'\\n'").unwrap(), ("", lit("\\n"))); + assert_eq!(char_lit.parse_next("'\\r'").unwrap(), ("", lit("\\r"))); + assert_eq!(char_lit.parse_next("'\\0'").unwrap(), ("", lit("\\0"))); // Escaped ascii characters (up to `0x7F`). - assert_eq!(char_lit("'\\x12'").unwrap(), ("", lit("\\x12"))); - assert_eq!(char_lit("'\\x02'").unwrap(), ("", lit("\\x02"))); - assert_eq!(char_lit("'\\x6a'").unwrap(), ("", lit("\\x6a"))); - assert_eq!(char_lit("'\\x7F'").unwrap(), ("", lit("\\x7F"))); + assert_eq!(char_lit.parse_next("'\\x12'").unwrap(), ("", lit("\\x12"))); + assert_eq!(char_lit.parse_next("'\\x02'").unwrap(), ("", lit("\\x02"))); + assert_eq!(char_lit.parse_next("'\\x6a'").unwrap(), ("", lit("\\x6a"))); + assert_eq!(char_lit.parse_next("'\\x7F'").unwrap(), ("", lit("\\x7F"))); // Escaped unicode characters (up to `0x10FFFF`). - assert_eq!(char_lit("'\\u{A}'").unwrap(), ("", lit("\\u{A}"))); - assert_eq!(char_lit("'\\u{10}'").unwrap(), ("", lit("\\u{10}"))); - assert_eq!(char_lit("'\\u{aa}'").unwrap(), ("", lit("\\u{aa}"))); - assert_eq!(char_lit("'\\u{10FFFF}'").unwrap(), ("", lit("\\u{10FFFF}"))); + assert_eq!( + char_lit.parse_next("'\\u{A}'").unwrap(), + ("", lit("\\u{A}")) + ); + assert_eq!( + char_lit.parse_next("'\\u{10}'").unwrap(), + ("", lit("\\u{10}")) + ); + assert_eq!( + char_lit.parse_next("'\\u{aa}'").unwrap(), + ("", lit("\\u{aa}")) + ); + assert_eq!( + char_lit.parse_next("'\\u{10FFFF}'").unwrap(), + ("", lit("\\u{10FFFF}")) + ); // Check with `b` prefix. assert_eq!( - char_lit("b'a'").unwrap(), + char_lit.parse_next("b'a'").unwrap(), ("", crate::CharLit { prefix: Some(crate::CharPrefix::Binary), content: "a" @@ -1127,32 +1160,32 @@ mod test { ); // Should fail. - assert!(char_lit("''").is_err()); - assert!(char_lit("'\\o'").is_err()); - assert!(char_lit("'\\x'").is_err()); - assert!(char_lit("'\\x1'").is_err()); - assert!(char_lit("'\\x80'").is_err()); - assert!(char_lit("'\\u'").is_err()); - assert!(char_lit("'\\u{}'").is_err()); - assert!(char_lit("'\\u{110000}'").is_err()); + assert!(char_lit.parse_next("''").is_err()); + assert!(char_lit.parse_next("'\\o'").is_err()); + assert!(char_lit.parse_next("'\\x'").is_err()); + assert!(char_lit.parse_next("'\\x1'").is_err()); + assert!(char_lit.parse_next("'\\x80'").is_err()); + assert!(char_lit.parse_next("'\\u'").is_err()); + assert!(char_lit.parse_next("'\\u{}'").is_err()); + assert!(char_lit.parse_next("'\\u{110000}'").is_err()); } #[test] fn test_str_lit() { assert_eq!( - str_lit(r#"b"hello""#).unwrap(), + str_lit.parse_next(r#"b"hello""#).unwrap(), ("", StrLit { prefix: Some(StrPrefix::Binary), content: "hello" }) ); assert_eq!( - str_lit(r#"c"hello""#).unwrap(), + str_lit.parse_next(r#"c"hello""#).unwrap(), ("", StrLit { prefix: Some(StrPrefix::CLike), content: "hello" }) ); - assert!(str_lit(r#"d"hello""#).is_err()); + assert!(str_lit.parse_next(r#"d"hello""#).is_err()); } } diff --git a/rinja_parser/src/node.rs b/rinja_parser/src/node.rs index 6eec10bd..c341da3a 100644 --- a/rinja_parser/src/node.rs +++ b/rinja_parser/src/node.rs @@ -85,7 +85,7 @@ impl<'a> Node<'a> { } let start = i; - let (j, tag) = preceded( + let (i, tag) = preceded( |i| s.tag_block_start(i), peek(preceded( (opt(Whitespace::parse), take_till0(not_ws)), @@ -112,7 +112,7 @@ impl<'a> Node<'a> { _ => return fail.parse_next(start), }; - let (i, node) = s.nest(j, |i| func(i, s))?; + let (i, node) = s.nest(i, |i| func(i, s))?; let (i, closed) = cut_node( None, diff --git a/rinja_parser/src/target.rs b/rinja_parser/src/target.rs index b8ddaa25..4eae338d 100644 --- a/rinja_parser/src/target.rs +++ b/rinja_parser/src/target.rs @@ -50,7 +50,7 @@ impl<'a> Target<'a> { // match tuples and unused parentheses let (i, target_is_tuple) = opt_opening_paren.parse_next(i)?; if target_is_tuple { - let (i, (singleton, mut targets)) = collect_targets(i, s, ')', Self::unnamed)?; + let (i, (singleton, mut targets)) = collect_targets(i, ')', |i| Self::unnamed(i, s))?; if singleton { return Ok((i, targets.pop().unwrap())); } @@ -61,7 +61,7 @@ impl<'a> Target<'a> { } let (i, target_is_array) = opt_opening_bracket.parse_next(i)?; if target_is_array { - let (i, (singleton, mut targets)) = collect_targets(i, s, ']', Self::unnamed)?; + let (i, (singleton, mut targets)) = collect_targets(i, ']', |i| Self::unnamed(i, s))?; if singleton { return Ok((i, targets.pop().unwrap())); } @@ -71,14 +71,10 @@ impl<'a> Target<'a> { )); } - let path = |i| { - path_or_identifier - .try_map(|v| match v { - PathOrIdentifier::Path(v) => Ok(v), - PathOrIdentifier::Identifier(v) => Err(v), - }) - .parse_next(i) - }; + let path = path_or_identifier.try_map(|v| match v { + PathOrIdentifier::Path(v) => Ok(v), + PathOrIdentifier::Identifier(v) => Err(v), + }); // match structs let (i, path) = opt(path).parse_next(i)?; @@ -88,7 +84,7 @@ impl<'a> Target<'a> { let (i, is_unnamed_struct) = opt_opening_paren.parse_next(i)?; if is_unnamed_struct { - let (i, (_, targets)) = collect_targets(i, s, ')', Self::unnamed)?; + let (i, (_, targets)) = collect_targets(i, ')', |i| Self::unnamed(i, s))?; return Ok(( i, Self::Tuple(path, only_one_rest_pattern(targets, false, "struct")?), @@ -97,7 +93,7 @@ impl<'a> Target<'a> { let (i, is_named_struct) = opt_opening_brace.parse_next(i)?; if is_named_struct { - let (i, (_, targets)) = collect_targets(i, s, '}', Self::named)?; + let (i, (_, targets)) = collect_targets(i, '}', |i| Self::named(i, s))?; return Ok((i, Self::Struct(path, targets))); } @@ -105,7 +101,7 @@ impl<'a> Target<'a> { } // neither literal nor struct nor path - let (new_i, name) = identifier(i)?; + let (new_i, name) = identifier.parse_next(i)?; let target = match name { "_" => Self::Placeholder(name), _ => verify_name(i, name)?, @@ -194,19 +190,18 @@ fn verify_name<'a>( fn collect_targets<'a, T>( i: &'a str, - s: &State<'_>, delim: char, - mut one: impl FnMut(&'a str, &State<'_>) -> ParseResult<'a, T>, + one: impl Parser<&'a str, T, ErrorContext<'a>>, ) -> ParseResult<'a, (bool, Vec)> { - let opt_comma = |i| ws(opt(',')).map(|o| o.is_some()).parse_next(i); - let mut opt_end = |i| ws(opt(one_of(delim))).map(|o| o.is_some()).parse_next(i); + let opt_comma = ws(opt(',')).map(|o| o.is_some()); + let mut opt_end = ws(opt(one_of(delim))).map(|o| o.is_some()); let (i, has_end) = opt_end.parse_next(i)?; if has_end { return Ok((i, (false, Vec::new()))); } - let (i, targets) = opt(separated1(|i| one(i, s), ws(',')).map(|v: Vec<_>| v)).parse_next(i)?; + let (i, targets) = opt(separated1(one, ws(',')).map(|v: Vec<_>| v)).parse_next(i)?; let Some(targets) = targets else { return Err(winnow::error::ErrMode::Cut(ErrorContext::new( "expected comma separated list of members",