Parse paths and identifiers only once

In the old implementation each variable in an expression would be parsed
up to three times:

* Try to parse a path because it contains a leading double colon, or
  infix double colons.
* Try to parse it as path again by scanning for an identifier that
  contains an upper case character.
* Fall back to scanning for any identifier.

This PR turns all three steps into one, without the need for
backtracking.
This commit is contained in:
René Kijewski 2023-07-14 12:46:57 +02:00 committed by René Kijewski
parent d38e2b4e26
commit 95ff27c087
3 changed files with 53 additions and 37 deletions

View File

@ -9,7 +9,10 @@ use nom::multi::{fold_many0, many0, separated_list0};
use nom::sequence::{pair, preceded, terminated, tuple}; use nom::sequence::{pair, preceded, terminated, tuple};
use nom::{error_position, IResult}; use nom::{error_position, IResult};
use super::{bool_lit, char_lit, identifier, not_ws, num_lit, path, str_lit, ws}; use super::{
bool_lit, char_lit, identifier, not_ws, num_lit, path_or_identifier, str_lit, ws,
PathOrIdentifier,
};
macro_rules! expr_prec_layer { macro_rules! expr_prec_layer {
( $name:ident, $inner:ident, $op:expr ) => { ( $name:ident, $inner:ident, $op:expr ) => {
@ -141,9 +144,8 @@ impl<'a> Expr<'a> {
Self::num, Self::num,
Self::str, Self::str,
Self::char, Self::char,
Self::path, Self::path_or_var,
Self::array, Self::array,
Self::var,
Self::group, Self::group,
))(i) ))(i)
} }
@ -186,13 +188,11 @@ impl<'a> Expr<'a> {
)(i) )(i)
} }
fn path(i: &'a str) -> IResult<&'a str, Self> { fn path_or_var(i: &'a str) -> IResult<&'a str, Self> {
let (i, path) = path(i)?; map(path_or_identifier, |v| match v {
Ok((i, Self::Path(path))) PathOrIdentifier::Path(v) => Self::Path(v),
} PathOrIdentifier::Identifier(v) => Self::Var(v),
})(i)
fn var(i: &'a str) -> IResult<&'a str, Self> {
map(identifier, Self::Var)(i)
} }
fn str(i: &'a str) -> IResult<&'a str, Self> { fn str(i: &'a str) -> IResult<&'a str, Self> {

View File

@ -8,10 +8,10 @@ use nom::branch::alt;
use nom::bytes::complete::{escaped, is_not, tag, take_till}; use nom::bytes::complete::{escaped, is_not, tag, take_till};
use nom::character::complete::char; use nom::character::complete::char;
use nom::character::complete::{anychar, digit1}; use nom::character::complete::{anychar, digit1};
use nom::combinator::{cut, eof, map, opt, recognize, value}; use nom::combinator::{cut, eof, map, opt, recognize};
use nom::error::ErrorKind; use nom::error::ErrorKind;
use nom::multi::separated_list1; use nom::multi::many1;
use nom::sequence::{delimited, pair, terminated, tuple}; use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::{error_position, AsChar, IResult, InputTakeAtPosition}; use nom::{error_position, AsChar, IResult, InputTakeAtPosition};
pub mod expr; pub mod expr;
@ -216,31 +216,38 @@ fn char_lit(i: &str) -> IResult<&str, &str> {
Ok((i, s.unwrap_or_default())) Ok((i, s.unwrap_or_default()))
} }
fn path(i: &str) -> IResult<&str, Vec<&str>> { enum PathOrIdentifier<'a> {
let root = opt(value("", ws(tag("::")))); Path(Vec<&'a str>),
let tail = separated_list1(ws(tag("::")), identifier); Identifier(&'a str),
}
fn path_or_identifier(i: &str) -> IResult<&str, PathOrIdentifier<'_>> {
let root = ws(opt(tag("::")));
let tail = opt(many1(preceded(ws(tag("::")), identifier)));
let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
let rest = rest.as_deref().unwrap_or_default();
match tuple((root, identifier, ws(tag("::")), tail))(i) {
Ok((i, (root, start, _, rest))) => {
let mut path = Vec::new();
path.extend(root);
path.push(start);
path.extend(rest);
Ok((i, path))
}
Err(err) => {
if let Ok((i, name)) = identifier(i) {
// The returned identifier can be assumed to be path if: // The returned identifier can be assumed to be path if:
// - Contains both a lowercase and uppercase character, i.e. a type name like `None` // - Contains both a lowercase and uppercase character, i.e. a type name like `None`
// - Doesn't contain any lowercase characters, i.e. it's a constant // - Doesn't contain any lowercase characters, i.e. it's a constant
// In short, if it contains any uppercase characters it's a path. // In short, if it contains any uppercase characters it's a path.
if name.contains(char::is_uppercase) { match (root, start, rest) {
return Ok((i, vec![name])); (Some(_), start, tail) => {
let mut path = Vec::with_capacity(2 + tail.len());
path.push("");
path.push(start);
path.extend(rest);
Ok((i, PathOrIdentifier::Path(path)))
} }
(None, name, []) if !name.contains(char::is_uppercase) => {
Ok((i, PathOrIdentifier::Identifier(name)))
} }
(None, start, tail) => {
// If `identifier()` fails then just return the original error let mut path = Vec::with_capacity(1 + tail.len());
Err(err) path.push(start);
path.extend(rest);
Ok((i, PathOrIdentifier::Path(path)))
} }
} }
} }

View File

@ -3,15 +3,17 @@ use std::str;
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::{tag, take_until}; use nom::bytes::complete::{tag, take_until};
use nom::character::complete::char; use nom::character::complete::char;
use nom::combinator::{complete, consumed, cut, eof, map, not, opt, peek, recognize, value}; use nom::combinator::{
complete, consumed, cut, eof, map, map_res, not, opt, peek, recognize, value,
};
use nom::error::{Error, ErrorKind}; use nom::error::{Error, ErrorKind};
use nom::multi::{fold_many0, many0, many1, separated_list0, separated_list1}; use nom::multi::{fold_many0, many0, many1, separated_list0, separated_list1};
use nom::sequence::{delimited, pair, preceded, terminated, tuple}; use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::{error_position, IResult}; use nom::{error_position, IResult};
use super::{ use super::{
bool_lit, char_lit, identifier, is_ws, keyword, num_lit, path, skip_till, str_lit, ws, Expr, bool_lit, char_lit, identifier, is_ws, keyword, num_lit, path_or_identifier, skip_till,
State, str_lit, ws, Expr, PathOrIdentifier, State,
}; };
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -161,6 +163,13 @@ impl<'a> Target<'a> {
return Ok((i, Self::Tuple(Vec::new(), targets))); return Ok((i, Self::Tuple(Vec::new(), targets)));
} }
let path = |i| {
map_res(path_or_identifier, |v| match v {
PathOrIdentifier::Path(v) => Ok(v),
PathOrIdentifier::Identifier(v) => Err(v),
})(i)
};
// match structs // match structs
let (i, path) = opt(path)(i)?; let (i, path) = opt(path)(i)?;
if let Some(path) = path { if let Some(path) = path {