Parse paths and identifiers only once

In the old implementation each variable in an expression would be parsed
up to three times:

* Try to parse a path because it contains a leading double colon, or
  infix double colons.
* Try to parse it as path again by scanning for an identifier that
  contains an upper case character.
* Fall back to scanning for any identifier.

This PR turns all three steps into one, without the need for
backtracking.
This commit is contained in:
René Kijewski 2023-07-14 12:46:57 +02:00 committed by René Kijewski
parent d38e2b4e26
commit 95ff27c087
3 changed files with 53 additions and 37 deletions

View File

@ -9,7 +9,10 @@ use nom::multi::{fold_many0, many0, separated_list0};
use nom::sequence::{pair, preceded, terminated, tuple};
use nom::{error_position, IResult};
use super::{bool_lit, char_lit, identifier, not_ws, num_lit, path, str_lit, ws};
use super::{
bool_lit, char_lit, identifier, not_ws, num_lit, path_or_identifier, str_lit, ws,
PathOrIdentifier,
};
macro_rules! expr_prec_layer {
( $name:ident, $inner:ident, $op:expr ) => {
@ -141,9 +144,8 @@ impl<'a> Expr<'a> {
Self::num,
Self::str,
Self::char,
Self::path,
Self::path_or_var,
Self::array,
Self::var,
Self::group,
))(i)
}
@ -186,13 +188,11 @@ impl<'a> Expr<'a> {
)(i)
}
fn path(i: &'a str) -> IResult<&'a str, Self> {
let (i, path) = path(i)?;
Ok((i, Self::Path(path)))
}
fn var(i: &'a str) -> IResult<&'a str, Self> {
map(identifier, Self::Var)(i)
fn path_or_var(i: &'a str) -> IResult<&'a str, Self> {
map(path_or_identifier, |v| match v {
PathOrIdentifier::Path(v) => Self::Path(v),
PathOrIdentifier::Identifier(v) => Self::Var(v),
})(i)
}
fn str(i: &'a str) -> IResult<&'a str, Self> {

View File

@ -8,10 +8,10 @@ use nom::branch::alt;
use nom::bytes::complete::{escaped, is_not, tag, take_till};
use nom::character::complete::char;
use nom::character::complete::{anychar, digit1};
use nom::combinator::{cut, eof, map, opt, recognize, value};
use nom::combinator::{cut, eof, map, opt, recognize};
use nom::error::ErrorKind;
use nom::multi::separated_list1;
use nom::sequence::{delimited, pair, terminated, tuple};
use nom::multi::many1;
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::{error_position, AsChar, IResult, InputTakeAtPosition};
pub mod expr;
@ -216,31 +216,38 @@ fn char_lit(i: &str) -> IResult<&str, &str> {
Ok((i, s.unwrap_or_default()))
}
fn path(i: &str) -> IResult<&str, Vec<&str>> {
let root = opt(value("", ws(tag("::"))));
let tail = separated_list1(ws(tag("::")), identifier);
enum PathOrIdentifier<'a> {
Path(Vec<&'a str>),
Identifier(&'a str),
}
match tuple((root, identifier, ws(tag("::")), tail))(i) {
Ok((i, (root, start, _, rest))) => {
let mut path = Vec::new();
path.extend(root);
fn path_or_identifier(i: &str) -> IResult<&str, PathOrIdentifier<'_>> {
let root = ws(opt(tag("::")));
let tail = opt(many1(preceded(ws(tag("::")), identifier)));
let (i, (root, start, rest)) = tuple((root, identifier, tail))(i)?;
let rest = rest.as_deref().unwrap_or_default();
// The returned identifier can be assumed to be path if:
// - Contains both a lowercase and uppercase character, i.e. a type name like `None`
// - Doesn't contain any lowercase characters, i.e. it's a constant
// In short, if it contains any uppercase characters it's a path.
match (root, start, rest) {
(Some(_), start, tail) => {
let mut path = Vec::with_capacity(2 + tail.len());
path.push("");
path.push(start);
path.extend(rest);
Ok((i, path))
Ok((i, PathOrIdentifier::Path(path)))
}
Err(err) => {
if let Ok((i, name)) = identifier(i) {
// The returned identifier can be assumed to be path if:
// - Contains both a lowercase and uppercase character, i.e. a type name like `None`
// - Doesn't contain any lowercase characters, i.e. it's a constant
// In short, if it contains any uppercase characters it's a path.
if name.contains(char::is_uppercase) {
return Ok((i, vec![name]));
}
}
// If `identifier()` fails then just return the original error
Err(err)
(None, name, []) if !name.contains(char::is_uppercase) => {
Ok((i, PathOrIdentifier::Identifier(name)))
}
(None, start, tail) => {
let mut path = Vec::with_capacity(1 + tail.len());
path.push(start);
path.extend(rest);
Ok((i, PathOrIdentifier::Path(path)))
}
}
}

View File

@ -3,15 +3,17 @@ use std::str;
use nom::branch::alt;
use nom::bytes::complete::{tag, take_until};
use nom::character::complete::char;
use nom::combinator::{complete, consumed, cut, eof, map, not, opt, peek, recognize, value};
use nom::combinator::{
complete, consumed, cut, eof, map, map_res, not, opt, peek, recognize, value,
};
use nom::error::{Error, ErrorKind};
use nom::multi::{fold_many0, many0, many1, separated_list0, separated_list1};
use nom::sequence::{delimited, pair, preceded, terminated, tuple};
use nom::{error_position, IResult};
use super::{
bool_lit, char_lit, identifier, is_ws, keyword, num_lit, path, skip_till, str_lit, ws, Expr,
State,
bool_lit, char_lit, identifier, is_ws, keyword, num_lit, path_or_identifier, skip_till,
str_lit, ws, Expr, PathOrIdentifier, State,
};
#[derive(Debug, PartialEq)]
@ -161,6 +163,13 @@ impl<'a> Target<'a> {
return Ok((i, Self::Tuple(Vec::new(), targets)));
}
let path = |i| {
map_res(path_or_identifier, |v| match v {
PathOrIdentifier::Path(v) => Ok(v),
PathOrIdentifier::Identifier(v) => Err(v),
})(i)
};
// match structs
let (i, path) = opt(path)(i)?;
if let Some(path) = path {