refactor(parser): Clean up prep for 0.5 (#226)

* refactor(parser): Rely on Parser trait rather than FnMut
* refactor(parser): Consistently use ParseResult
* refactor(parser): Accept Parser rather than FnMut
* refactor(parser): Chain the input through
* refactor(parser): Accept stateful Parser rather than stateless FnMut
* refactor(parser): Remove redundant closures
This commit is contained in:
Ed Page 2024-11-07 13:54:48 -06:00 committed by GitHub
parent 80811b5094
commit 1ac0c35144
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 96 additions and 68 deletions

View File

@ -474,7 +474,7 @@ fn str_lit_without_prefix(i: &str) -> ParseResult<'_> {
Ok((i, s.unwrap_or_default()))
}
fn str_lit(i: &str) -> Result<(&str, StrLit<'_>), ParseErr<'_>> {
fn str_lit(i: &str) -> ParseResult<'_, StrLit<'_>> {
let (i, (prefix, content)) = (opt(alt(('b', 'c'))), str_lit_without_prefix).parse_next(i)?;
let prefix = match prefix {
Some('b') => Some(StrPrefix::Binary),
@ -497,7 +497,7 @@ pub struct CharLit<'a> {
// Information about allowed character escapes is available at:
// <https://doc.rust-lang.org/reference/tokens.html#character-literals>.
fn char_lit(i: &str) -> Result<(&str, CharLit<'_>), ParseErr<'_>> {
fn char_lit(i: &str) -> ParseResult<'_, CharLit<'_>> {
let start = i;
let (i, (b_prefix, s)) = (
opt('b'),
@ -653,15 +653,15 @@ impl<'a> State<'a> {
}
}
fn nest<'b, T, F: FnOnce(&'b str) -> ParseResult<'b, T>>(
fn nest<'b, T, F: Parser<&'b str, T, ErrorContext<'b>>>(
&self,
i: &'b str,
callback: F,
mut callback: F,
) -> ParseResult<'b, T> {
let prev_level = self.level.get();
let (_, level) = prev_level.nest(i)?;
self.level.set(level);
let ret = callback(i);
let ret = callback.parse_next(i);
self.level.set(prev_level);
ret
}
@ -1049,42 +1049,63 @@ mod test {
#[test]
fn test_num_lit() {
// Should fail.
assert!(num_lit(".").is_err());
assert!(num_lit.parse_next(".").is_err());
// Should succeed.
assert_eq!(
num_lit("1.2E-02").unwrap(),
num_lit.parse_next("1.2E-02").unwrap(),
("", Num::Float("1.2E-02", None))
);
assert_eq!(num_lit("4e3").unwrap(), ("", Num::Float("4e3", None)),);
assert_eq!(num_lit("4e+_3").unwrap(), ("", Num::Float("4e+_3", None)),);
assert_eq!(
num_lit.parse_next("4e3").unwrap(),
("", Num::Float("4e3", None)),
);
assert_eq!(
num_lit.parse_next("4e+_3").unwrap(),
("", Num::Float("4e+_3", None)),
);
// Not supported because Rust wants a number before the `.`.
assert!(num_lit(".1").is_err());
assert!(num_lit(".1E-02").is_err());
assert!(num_lit.parse_next(".1").is_err());
assert!(num_lit.parse_next(".1E-02").is_err());
// A `_` directly after the `.` denotes a field.
assert_eq!(num_lit("1._0").unwrap(), ("._0", Num::Int("1", None)));
assert_eq!(num_lit("1_.0").unwrap(), ("", Num::Float("1_.0", None)));
assert_eq!(
num_lit.parse_next("1._0").unwrap(),
("._0", Num::Int("1", None))
);
assert_eq!(
num_lit.parse_next("1_.0").unwrap(),
("", Num::Float("1_.0", None))
);
// Not supported (voluntarily because of `1..` syntax).
assert_eq!(num_lit("1.").unwrap(), (".", Num::Int("1", None)));
assert_eq!(num_lit("1_.").unwrap(), (".", Num::Int("1_", None)));
assert_eq!(num_lit("1_2.").unwrap(), (".", Num::Int("1_2", None)));
assert_eq!(
num_lit.parse_next("1.").unwrap(),
(".", Num::Int("1", None))
);
assert_eq!(
num_lit.parse_next("1_.").unwrap(),
(".", Num::Int("1_", None))
);
assert_eq!(
num_lit.parse_next("1_2.").unwrap(),
(".", Num::Int("1_2", None))
);
// Numbers with suffixes
assert_eq!(
num_lit("-1usize").unwrap(),
num_lit.parse_next("-1usize").unwrap(),
("", Num::Int("-1", Some(IntKind::Usize)))
);
assert_eq!(
num_lit("123_f32").unwrap(),
num_lit.parse_next("123_f32").unwrap(),
("", Num::Float("123_", Some(FloatKind::F32)))
);
assert_eq!(
num_lit("1_.2_e+_3_f64|into_isize").unwrap(),
num_lit.parse_next("1_.2_e+_3_f64|into_isize").unwrap(),
(
"|into_isize",
Num::Float("1_.2_e+_3_", Some(FloatKind::F64))
)
);
assert_eq!(
num_lit("4e3f128").unwrap(),
num_lit.parse_next("4e3f128").unwrap(),
("", Num::Float("4e3", Some(FloatKind::F128))),
);
}
@ -1096,30 +1117,42 @@ mod test {
content: s,
};
assert_eq!(char_lit("'a'").unwrap(), ("", lit("a")));
assert_eq!(char_lit("'字'").unwrap(), ("", lit("")));
assert_eq!(char_lit.parse_next("'a'").unwrap(), ("", lit("a")));
assert_eq!(char_lit.parse_next("'字'").unwrap(), ("", lit("")));
// Escaped single characters.
assert_eq!(char_lit("'\\\"'").unwrap(), ("", lit("\\\"")));
assert_eq!(char_lit("'\\''").unwrap(), ("", lit("\\'")));
assert_eq!(char_lit("'\\t'").unwrap(), ("", lit("\\t")));
assert_eq!(char_lit("'\\n'").unwrap(), ("", lit("\\n")));
assert_eq!(char_lit("'\\r'").unwrap(), ("", lit("\\r")));
assert_eq!(char_lit("'\\0'").unwrap(), ("", lit("\\0")));
assert_eq!(char_lit.parse_next("'\\\"'").unwrap(), ("", lit("\\\"")));
assert_eq!(char_lit.parse_next("'\\''").unwrap(), ("", lit("\\'")));
assert_eq!(char_lit.parse_next("'\\t'").unwrap(), ("", lit("\\t")));
assert_eq!(char_lit.parse_next("'\\n'").unwrap(), ("", lit("\\n")));
assert_eq!(char_lit.parse_next("'\\r'").unwrap(), ("", lit("\\r")));
assert_eq!(char_lit.parse_next("'\\0'").unwrap(), ("", lit("\\0")));
// Escaped ascii characters (up to `0x7F`).
assert_eq!(char_lit("'\\x12'").unwrap(), ("", lit("\\x12")));
assert_eq!(char_lit("'\\x02'").unwrap(), ("", lit("\\x02")));
assert_eq!(char_lit("'\\x6a'").unwrap(), ("", lit("\\x6a")));
assert_eq!(char_lit("'\\x7F'").unwrap(), ("", lit("\\x7F")));
assert_eq!(char_lit.parse_next("'\\x12'").unwrap(), ("", lit("\\x12")));
assert_eq!(char_lit.parse_next("'\\x02'").unwrap(), ("", lit("\\x02")));
assert_eq!(char_lit.parse_next("'\\x6a'").unwrap(), ("", lit("\\x6a")));
assert_eq!(char_lit.parse_next("'\\x7F'").unwrap(), ("", lit("\\x7F")));
// Escaped unicode characters (up to `0x10FFFF`).
assert_eq!(char_lit("'\\u{A}'").unwrap(), ("", lit("\\u{A}")));
assert_eq!(char_lit("'\\u{10}'").unwrap(), ("", lit("\\u{10}")));
assert_eq!(char_lit("'\\u{aa}'").unwrap(), ("", lit("\\u{aa}")));
assert_eq!(char_lit("'\\u{10FFFF}'").unwrap(), ("", lit("\\u{10FFFF}")));
assert_eq!(
char_lit.parse_next("'\\u{A}'").unwrap(),
("", lit("\\u{A}"))
);
assert_eq!(
char_lit.parse_next("'\\u{10}'").unwrap(),
("", lit("\\u{10}"))
);
assert_eq!(
char_lit.parse_next("'\\u{aa}'").unwrap(),
("", lit("\\u{aa}"))
);
assert_eq!(
char_lit.parse_next("'\\u{10FFFF}'").unwrap(),
("", lit("\\u{10FFFF}"))
);
// Check with `b` prefix.
assert_eq!(
char_lit("b'a'").unwrap(),
char_lit.parse_next("b'a'").unwrap(),
("", crate::CharLit {
prefix: Some(crate::CharPrefix::Binary),
content: "a"
@ -1127,32 +1160,32 @@ mod test {
);
// Should fail.
assert!(char_lit("''").is_err());
assert!(char_lit("'\\o'").is_err());
assert!(char_lit("'\\x'").is_err());
assert!(char_lit("'\\x1'").is_err());
assert!(char_lit("'\\x80'").is_err());
assert!(char_lit("'\\u'").is_err());
assert!(char_lit("'\\u{}'").is_err());
assert!(char_lit("'\\u{110000}'").is_err());
assert!(char_lit.parse_next("''").is_err());
assert!(char_lit.parse_next("'\\o'").is_err());
assert!(char_lit.parse_next("'\\x'").is_err());
assert!(char_lit.parse_next("'\\x1'").is_err());
assert!(char_lit.parse_next("'\\x80'").is_err());
assert!(char_lit.parse_next("'\\u'").is_err());
assert!(char_lit.parse_next("'\\u{}'").is_err());
assert!(char_lit.parse_next("'\\u{110000}'").is_err());
}
#[test]
fn test_str_lit() {
assert_eq!(
str_lit(r#"b"hello""#).unwrap(),
str_lit.parse_next(r#"b"hello""#).unwrap(),
("", StrLit {
prefix: Some(StrPrefix::Binary),
content: "hello"
})
);
assert_eq!(
str_lit(r#"c"hello""#).unwrap(),
str_lit.parse_next(r#"c"hello""#).unwrap(),
("", StrLit {
prefix: Some(StrPrefix::CLike),
content: "hello"
})
);
assert!(str_lit(r#"d"hello""#).is_err());
assert!(str_lit.parse_next(r#"d"hello""#).is_err());
}
}

View File

@ -85,7 +85,7 @@ impl<'a> Node<'a> {
}
let start = i;
let (j, tag) = preceded(
let (i, tag) = preceded(
|i| s.tag_block_start(i),
peek(preceded(
(opt(Whitespace::parse), take_till0(not_ws)),
@ -112,7 +112,7 @@ impl<'a> Node<'a> {
_ => return fail.parse_next(start),
};
let (i, node) = s.nest(j, |i| func(i, s))?;
let (i, node) = s.nest(i, |i| func(i, s))?;
let (i, closed) = cut_node(
None,

View File

@ -50,7 +50,7 @@ impl<'a> Target<'a> {
// match tuples and unused parentheses
let (i, target_is_tuple) = opt_opening_paren.parse_next(i)?;
if target_is_tuple {
let (i, (singleton, mut targets)) = collect_targets(i, s, ')', Self::unnamed)?;
let (i, (singleton, mut targets)) = collect_targets(i, ')', |i| Self::unnamed(i, s))?;
if singleton {
return Ok((i, targets.pop().unwrap()));
}
@ -61,7 +61,7 @@ impl<'a> Target<'a> {
}
let (i, target_is_array) = opt_opening_bracket.parse_next(i)?;
if target_is_array {
let (i, (singleton, mut targets)) = collect_targets(i, s, ']', Self::unnamed)?;
let (i, (singleton, mut targets)) = collect_targets(i, ']', |i| Self::unnamed(i, s))?;
if singleton {
return Ok((i, targets.pop().unwrap()));
}
@ -71,14 +71,10 @@ impl<'a> Target<'a> {
));
}
let path = |i| {
path_or_identifier
.try_map(|v| match v {
PathOrIdentifier::Path(v) => Ok(v),
PathOrIdentifier::Identifier(v) => Err(v),
})
.parse_next(i)
};
let path = path_or_identifier.try_map(|v| match v {
PathOrIdentifier::Path(v) => Ok(v),
PathOrIdentifier::Identifier(v) => Err(v),
});
// match structs
let (i, path) = opt(path).parse_next(i)?;
@ -88,7 +84,7 @@ impl<'a> Target<'a> {
let (i, is_unnamed_struct) = opt_opening_paren.parse_next(i)?;
if is_unnamed_struct {
let (i, (_, targets)) = collect_targets(i, s, ')', Self::unnamed)?;
let (i, (_, targets)) = collect_targets(i, ')', |i| Self::unnamed(i, s))?;
return Ok((
i,
Self::Tuple(path, only_one_rest_pattern(targets, false, "struct")?),
@ -97,7 +93,7 @@ impl<'a> Target<'a> {
let (i, is_named_struct) = opt_opening_brace.parse_next(i)?;
if is_named_struct {
let (i, (_, targets)) = collect_targets(i, s, '}', Self::named)?;
let (i, (_, targets)) = collect_targets(i, '}', |i| Self::named(i, s))?;
return Ok((i, Self::Struct(path, targets)));
}
@ -105,7 +101,7 @@ impl<'a> Target<'a> {
}
// neither literal nor struct nor path
let (new_i, name) = identifier(i)?;
let (new_i, name) = identifier.parse_next(i)?;
let target = match name {
"_" => Self::Placeholder(name),
_ => verify_name(i, name)?,
@ -194,19 +190,18 @@ fn verify_name<'a>(
fn collect_targets<'a, T>(
i: &'a str,
s: &State<'_>,
delim: char,
mut one: impl FnMut(&'a str, &State<'_>) -> ParseResult<'a, T>,
one: impl Parser<&'a str, T, ErrorContext<'a>>,
) -> ParseResult<'a, (bool, Vec<T>)> {
let opt_comma = |i| ws(opt(',')).map(|o| o.is_some()).parse_next(i);
let mut opt_end = |i| ws(opt(one_of(delim))).map(|o| o.is_some()).parse_next(i);
let opt_comma = ws(opt(',')).map(|o| o.is_some());
let mut opt_end = ws(opt(one_of(delim))).map(|o| o.is_some());
let (i, has_end) = opt_end.parse_next(i)?;
if has_end {
return Ok((i, (false, Vec::new())));
}
let (i, targets) = opt(separated1(|i| one(i, s), ws(',')).map(|v: Vec<_>| v)).parse_next(i)?;
let (i, targets) = opt(separated1(one, ws(',')).map(|v: Vec<_>| v)).parse_next(i)?;
let Some(targets) = targets else {
return Err(winnow::error::ErrMode::Cut(ErrorContext::new(
"expected comma separated list of members",