From dab685dd87ba99f6c0f005f2ce7b0a3c10dada22 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 3 Feb 2023 11:47:33 +0100 Subject: [PATCH 1/8] De-magic number parser::Output encoding --- crates/parser/src/output.rs | 49 +++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs index 6ca841cfe0..3de6c0aba8 100644 --- a/crates/parser/src/output.rs +++ b/crates/parser/src/output.rs @@ -31,47 +31,70 @@ pub enum Step<'a> { } impl Output { + const EVENT_MASK: u32 = 0b1; + const TAG_MASK: u32 = 0x0000_00F0; + const N_INPUT_TOKEN_MASK: u32 = 0x0000_FF00; + const KIND_MASK: u32 = 0xFFFF_0000; + + const ERROR_SHIFT: u32 = Self::EVENT_MASK.trailing_ones(); + const TAG_SHIFT: u32 = Self::TAG_MASK.trailing_zeros(); + const N_INPUT_TOKEN_SHIFT: u32 = Self::N_INPUT_TOKEN_MASK.trailing_zeros(); + const KIND_SHIFT: u32 = Self::KIND_MASK.trailing_zeros(); + + const TOKEN_EVENT: u8 = 0; + const ENTER_EVENT: u8 = 1; + const EXIT_EVENT: u8 = 2; + pub fn iter(&self) -> impl Iterator> { self.event.iter().map(|&event| { - if event & 0b1 == 0 { - return Step::Error { msg: self.error[(event as usize) >> 1].as_str() }; + if event & Self::EVENT_MASK == 0 { + return Step::Error { + msg: self.error[(event as usize) >> Self::ERROR_SHIFT].as_str(), + }; } - let tag = ((event & 0x0000_00F0) >> 4) as u8; + let tag = ((event & Self::TAG_MASK) >> Self::TAG_SHIFT) as u8; match tag { - 0 => { - let kind: SyntaxKind = (((event & 0xFFFF_0000) >> 16) as u16).into(); - let n_input_tokens = ((event & 0x0000_FF00) >> 8) as u8; + Self::TOKEN_EVENT => { + let kind: SyntaxKind = + (((event & Self::KIND_MASK) >> Self::KIND_SHIFT) as u16).into(); + let n_input_tokens = + ((event & Self::N_INPUT_TOKEN_MASK) >> Self::N_INPUT_TOKEN_SHIFT) as u8; Step::Token { kind, n_input_tokens } } - 1 => { - let kind: SyntaxKind = (((event & 0xFFFF_0000) >> 16) as u16).into(); + Self::ENTER_EVENT => { + let kind: SyntaxKind = + (((event & Self::KIND_MASK) >> Self::KIND_SHIFT) as u16).into(); Step::Enter { kind } } - 2 => Step::Exit, + Self::EXIT_EVENT => Step::Exit, _ => unreachable!(), } }) } pub(crate) fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { - let e = ((kind as u16 as u32) << 16) | ((n_tokens as u32) << 8) | 1; + let e = ((kind as u16 as u32) << Self::KIND_SHIFT) + | ((n_tokens as u32) << Self::N_INPUT_TOKEN_SHIFT) + | Self::EVENT_MASK; self.event.push(e) } pub(crate) fn enter_node(&mut self, kind: SyntaxKind) { - let e = ((kind as u16 as u32) << 16) | (1 << 4) | 1; + let e = ((kind as u16 as u32) << Self::KIND_SHIFT) + | ((Self::ENTER_EVENT as u32) << Self::TAG_SHIFT) + | Self::EVENT_MASK; self.event.push(e) } pub(crate) fn leave_node(&mut self) { - let e = 2 << 4 | 1; + let e = (Self::EXIT_EVENT as u32) << Self::TAG_SHIFT | Self::EVENT_MASK; self.event.push(e) } pub(crate) fn error(&mut self, error: String) { let idx = self.error.len(); self.error.push(error); - let e = (idx as u32) << 1; + let e = (idx as u32) << Self::ERROR_SHIFT; self.event.push(e); } } From 6fa6efe90fc8a79395cacb5c71315f0e2b32e623 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 3 Feb 2023 17:18:48 +0100 Subject: [PATCH 2/8] fix: Fix parsing of nested tuple field accesses in a cursed way --- crates/parser/src/event.rs | 10 +- crates/parser/src/grammar/expressions.rs | 106 ++++++++++++------ crates/parser/src/lib.rs | 2 +- crates/parser/src/output.rs | 12 ++ crates/parser/src/parser.rs | 32 ++++++ crates/parser/src/shortcuts.rs | 54 ++++++++- crates/parser/src/tests/prefix_entries.rs | 4 + .../parser/inline/ok/0011_field_expr.rast | 33 ++++++ .../parser/inline/ok/0011_field_expr.rs | 2 + .../inline/ok/0107_method_call_expr.rast | 43 +++++++ .../parser/inline/ok/0107_method_call_expr.rs | 2 + .../parser/inline/ok/0137_await_expr.rast | 35 ++++++ .../parser/inline/ok/0137_await_expr.rs | 2 + 13 files changed, 298 insertions(+), 39 deletions(-) diff --git a/crates/parser/src/event.rs b/crates/parser/src/event.rs index b0e70e7943..fb2616cf01 100644 --- a/crates/parser/src/event.rs +++ b/crates/parser/src/event.rs @@ -72,9 +72,12 @@ pub(crate) enum Event { /// `n_raw_tokens = 2` is used to produced a single `>>`. Token { kind: SyntaxKind, + // Consider custom enum here? n_raw_tokens: u8, }, - + FloatSplitHack { + has_pseudo_dot: bool, + }, Error { msg: String, }, @@ -125,6 +128,11 @@ pub(super) fn process(mut events: Vec) -> Output { Event::Token { kind, n_raw_tokens } => { res.token(kind, n_raw_tokens); } + Event::FloatSplitHack { has_pseudo_dot } => { + res.float_split_hack(has_pseudo_dot); + let ev = mem::replace(&mut events[i + 1], Event::tombstone()); + assert!(matches!(ev, Event::Finish), "{ev:?}"); + } Event::Error { msg } => res.error(msg), } } diff --git a/crates/parser/src/grammar/expressions.rs b/crates/parser/src/grammar/expressions.rs index 8932330b82..7516ac3c4b 100644 --- a/crates/parser/src/grammar/expressions.rs +++ b/crates/parser/src/grammar/expressions.rs @@ -379,7 +379,7 @@ fn postfix_expr( // } T!['('] if allow_calls => call_expr(p, lhs), T!['['] if allow_calls => index_expr(p, lhs), - T![.] => match postfix_dot_expr(p, lhs) { + T![.] => match postfix_dot_expr::(p, lhs) { Ok(it) => it, Err(it) => { lhs = it; @@ -393,35 +393,44 @@ fn postfix_expr( block_like = BlockLike::NotBlock; } return (lhs, block_like); +} - fn postfix_dot_expr( - p: &mut Parser<'_>, - lhs: CompletedMarker, - ) -> Result { +fn postfix_dot_expr( + p: &mut Parser<'_>, + lhs: CompletedMarker, +) -> Result { + if !FLOAT_RECOVERY { assert!(p.at(T![.])); - if p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])) { - return Ok(method_call_expr(p, lhs)); - } - - // test await_expr - // fn foo() { - // x.await; - // x.0.await; - // x.0().await?.hello(); - // } - if p.nth(1) == T![await] { - let m = lhs.precede(p); - p.bump(T![.]); - p.bump(T![await]); - return Ok(m.complete(p, AWAIT_EXPR)); - } - - if p.at(T![..=]) || p.at(T![..]) { - return Err(lhs); - } - - Ok(field_expr(p, lhs)) } + let nth1 = if FLOAT_RECOVERY { 0 } else { 1 }; + let nth2 = if FLOAT_RECOVERY { 1 } else { 2 }; + + if p.nth(nth1) == IDENT && (p.nth(nth2) == T!['('] || p.nth_at(nth2, T![::])) { + return Ok(method_call_expr::(p, lhs)); + } + + // test await_expr + // fn foo() { + // x.await; + // x.0.await; + // x.0().await?.hello(); + // x.0.0.await; + // x.0. await; + // } + if p.nth(nth1) == T![await] { + let m = lhs.precede(p); + if !FLOAT_RECOVERY { + p.bump(T![.]); + } + p.bump(T![await]); + return Ok(m.complete(p, AWAIT_EXPR)); + } + + if p.at(T![..=]) || p.at(T![..]) { + return Err(lhs); + } + + field_expr::(p, lhs) } // test call_expr @@ -455,11 +464,22 @@ fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { // fn foo() { // x.foo(); // y.bar::(1, 2,); +// x.0.0.call(); +// x.0. call(); // } -fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { - assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::]))); +fn method_call_expr( + p: &mut Parser<'_>, + lhs: CompletedMarker, +) -> CompletedMarker { + if FLOAT_RECOVERY { + assert!(p.nth(0) == IDENT && (p.nth(1) == T!['('] || p.nth_at(1, T![::]))); + } else { + assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::]))); + } let m = lhs.precede(p); - p.bump_any(); + if !FLOAT_RECOVERY { + p.bump(T![.]); + } name_ref(p); generic_args::opt_generic_arg_list(p, true); if p.at(T!['(']) { @@ -472,21 +492,35 @@ fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker // fn foo() { // x.foo; // x.0.bar; +// x.0.1; +// x.0. bar; // x.0(); // } -fn field_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { - assert!(p.at(T![.])); +fn field_expr( + p: &mut Parser<'_>, + lhs: CompletedMarker, +) -> Result { + if !FLOAT_RECOVERY { + assert!(p.at(T![.])); + } let m = lhs.precede(p); - p.bump(T![.]); + if !FLOAT_RECOVERY { + p.bump(T![.]); + } if p.at(IDENT) || p.at(INT_NUMBER) { name_ref_or_index(p); } else if p.at(FLOAT_NUMBER) { - // FIXME: How to recover and instead parse INT + T![.]? - p.bump_any(); + return match p.split_float(m) { + (true, m) => { + let lhs = m.complete(p, FIELD_EXPR); + postfix_dot_expr::(p, lhs) + } + (false, m) => Ok(m.complete(p, FIELD_EXPR)), + }; } else { p.error("expected field name or number"); } - m.complete(p, FIELD_EXPR) + Ok(m.complete(p, FIELD_EXPR)) } // test try_expr diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 87be479277..f20d32d6cf 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -102,7 +102,7 @@ impl TopEntryPoint { match step { Step::Enter { .. } => depth += 1, Step::Exit => depth -= 1, - Step::Token { .. } | Step::Error { .. } => (), + Step::FloatSplit { .. } | Step::Token { .. } | Step::Error { .. } => (), } } assert!(!first, "no tree at all"); diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs index 3de6c0aba8..9587c8cb1b 100644 --- a/crates/parser/src/output.rs +++ b/crates/parser/src/output.rs @@ -25,6 +25,7 @@ pub struct Output { #[derive(Debug)] pub enum Step<'a> { Token { kind: SyntaxKind, n_input_tokens: u8 }, + FloatSplit { has_pseudo_dot: bool }, Enter { kind: SyntaxKind }, Exit, Error { msg: &'a str }, @@ -44,6 +45,7 @@ impl Output { const TOKEN_EVENT: u8 = 0; const ENTER_EVENT: u8 = 1; const EXIT_EVENT: u8 = 2; + const SPLIT_EVENT: u8 = 3; pub fn iter(&self) -> impl Iterator> { self.event.iter().map(|&event| { @@ -67,6 +69,9 @@ impl Output { Step::Enter { kind } } Self::EXIT_EVENT => Step::Exit, + Self::SPLIT_EVENT => { + Step::FloatSplit { has_pseudo_dot: event & Self::N_INPUT_TOKEN_MASK != 0 } + } _ => unreachable!(), } }) @@ -79,6 +84,13 @@ impl Output { self.event.push(e) } + pub(crate) fn float_split_hack(&mut self, has_pseudo_dot: bool) { + let e = (Self::SPLIT_EVENT as u32) << Self::TAG_SHIFT + | ((has_pseudo_dot as u32) << Self::N_INPUT_TOKEN_SHIFT) + | Self::EVENT_MASK; + self.event.push(e); + } + pub(crate) fn enter_node(&mut self, kind: SyntaxKind) { let e = ((kind as u16 as u32) << Self::KIND_SHIFT) | ((Self::ENTER_EVENT as u32) << Self::TAG_SHIFT) diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 48aecb35be..0f4fa60229 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -181,6 +181,38 @@ impl<'t> Parser<'t> { self.do_bump(kind, 1); } + /// Advances the parser by one token + pub(crate) fn split_float(&mut self, marker: Marker) -> (bool, Marker) { + assert!(self.at(SyntaxKind::FLOAT_NUMBER)); + // we have parse `.` + // ``.0.1 + // here we need to insert an extra event + // + // ``. 0. 1; + // here we need to change the follow up parse, the return value will cause us to emulate a dot + // the actual splitting happens later + let has_pseudo_dot = !self.inp.is_joint(self.pos); + let marker = if !has_pseudo_dot { + let new_pos = self.start(); + let idx = marker.pos as usize; + match &mut self.events[idx] { + Event::Start { forward_parent, kind } => { + *kind = SyntaxKind::FIELD_EXPR; + *forward_parent = Some(new_pos.pos - marker.pos); + } + _ => unreachable!(), + } + // NOTE: This brings the start / finish pairs out of balance! + std::mem::forget(marker); + new_pos + } else { + marker + }; + self.pos += 1 as usize; + self.push_event(Event::FloatSplitHack { has_pseudo_dot }); + (has_pseudo_dot, marker) + } + /// Advances the parser by one token, remapping its kind. /// This is useful to create contextual keywords from /// identifiers. For example, the lexer creates a `union` diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index 2be4050d13..18a6f838fa 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -44,7 +44,17 @@ impl<'a> LexedStr<'a> { } res.push(kind); } - was_joint = true; + if kind == SyntaxKind::FLOAT_NUMBER { + // we set jointness for floating point numbers as a hack to inform the + // parser about whether we have a `0.` or `0.1` style float + if self.text(i).split_once('.').map_or(false, |(_, it)| it.is_empty()) { + was_joint = false; + } else { + was_joint = true; + } + } else { + was_joint = true; + } } } res @@ -63,6 +73,7 @@ impl<'a> LexedStr<'a> { Step::Token { kind, n_input_tokens: n_raw_tokens } => { builder.token(kind, n_raw_tokens) } + Step::FloatSplit { has_pseudo_dot } => builder.float_split(has_pseudo_dot), Step::Enter { kind } => builder.enter(kind), Step::Exit => builder.exit(), Step::Error { msg } => { @@ -109,6 +120,16 @@ impl Builder<'_, '_> { self.do_token(kind, n_tokens as usize); } + fn float_split(&mut self, has_pseudo_dot: bool) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingEnter => unreachable!(), + State::PendingExit => (self.sink)(StrStep::Exit), + State::Normal => (), + } + self.eat_trivias(); + self.do_float_split(has_pseudo_dot); + } + fn enter(&mut self, kind: SyntaxKind) { match mem::replace(&mut self.state, State::Normal) { State::PendingEnter => { @@ -164,6 +185,37 @@ impl Builder<'_, '_> { self.pos += n_tokens; (self.sink)(StrStep::Token { kind, text }); } + + fn do_float_split(&mut self, has_pseudo_dot: bool) { + let text = &self.lexed.range_text(self.pos..self.pos + 1); + self.pos += 1; + match text.split_once('.') { + Some((left, right)) => { + assert!(!left.is_empty()); + (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF }); + (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: left }); + (self.sink)(StrStep::Exit); + + // here we move the exit up, the original exit has been deleted in process + (self.sink)(StrStep::Exit); + + (self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." }); + + if has_pseudo_dot { + assert!(right.is_empty()); + self.state = State::Normal; + } else { + (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF }); + (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: right }); + (self.sink)(StrStep::Exit); + + // the parser creates an unbalanced start node, we are required to close it here + self.state = State::PendingExit; + } + } + None => unreachable!(), + } + } } fn n_attached_trivias<'a>( diff --git a/crates/parser/src/tests/prefix_entries.rs b/crates/parser/src/tests/prefix_entries.rs index e626b4f27e..40f92e5880 100644 --- a/crates/parser/src/tests/prefix_entries.rs +++ b/crates/parser/src/tests/prefix_entries.rs @@ -51,6 +51,9 @@ fn expr() { check(PrefixEntryPoint::Expr, "-1", "-1"); check(PrefixEntryPoint::Expr, "fn foo() {}", "fn"); check(PrefixEntryPoint::Expr, "#[attr] ()", "#[attr] ()"); + check(PrefixEntryPoint::Expr, "foo.0", "foo.0"); + check(PrefixEntryPoint::Expr, "foo.0.1", "foo.0.1"); + check(PrefixEntryPoint::Expr, "foo.0. foo", "foo.0. foo"); } #[test] @@ -88,6 +91,7 @@ fn check(entry: PrefixEntryPoint, input: &str, prefix: &str) { for step in entry.parse(&input).iter() { match step { Step::Token { n_input_tokens, .. } => n_tokens += n_input_tokens as usize, + Step::FloatSplit { .. } => n_tokens += 1, Step::Enter { .. } | Step::Exit | Step::Error { .. } => (), } } diff --git a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast index 8498724b9e..dd27dc4896 100644 --- a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast +++ b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rast @@ -40,6 +40,39 @@ SOURCE_FILE IDENT "bar" SEMICOLON ";" WHITESPACE "\n " + EXPR_STMT + FIELD_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + NAME_REF + INT_NUMBER "1" + SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + FIELD_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + WHITESPACE " " + NAME_REF + IDENT "bar" + SEMICOLON ";" + WHITESPACE "\n " EXPR_STMT CALL_EXPR FIELD_EXPR diff --git a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs index b8da2ddc30..98dbe45a7e 100644 --- a/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs +++ b/crates/parser/test_data/parser/inline/ok/0011_field_expr.rs @@ -1,5 +1,7 @@ fn foo() { x.foo; x.0.bar; + x.0.1; + x.0. bar; x.0(); } diff --git a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast index dcbcfe1231..b28b8eb673 100644 --- a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast +++ b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rast @@ -58,6 +58,49 @@ SOURCE_FILE COMMA "," R_PAREN ")" SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + METHOD_CALL_EXPR + FIELD_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + NAME_REF + IDENT "call" + ARG_LIST + L_PAREN "(" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + METHOD_CALL_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + WHITESPACE " " + NAME_REF + IDENT "call" + ARG_LIST + L_PAREN "(" + R_PAREN ")" + SEMICOLON ";" WHITESPACE "\n" R_CURLY "}" WHITESPACE "\n" diff --git a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs index 1a3aa35ae8..48bb6381e8 100644 --- a/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs +++ b/crates/parser/test_data/parser/inline/ok/0107_method_call_expr.rs @@ -1,4 +1,6 @@ fn foo() { x.foo(); y.bar::(1, 2,); + x.0.0.call(); + x.0. call(); } diff --git a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast index 9d37ada0da..af713a2207 100644 --- a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast +++ b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rast @@ -65,6 +65,41 @@ SOURCE_FILE L_PAREN "(" R_PAREN ")" SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + AWAIT_EXPR + FIELD_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + AWAIT_KW "await" + SEMICOLON ";" + WHITESPACE "\n " + EXPR_STMT + AWAIT_EXPR + FIELD_EXPR + PATH_EXPR + PATH + PATH_SEGMENT + NAME_REF + IDENT "x" + DOT "." + NAME_REF + INT_NUMBER "0" + DOT "." + WHITESPACE " " + AWAIT_KW "await" + SEMICOLON ";" WHITESPACE "\n" R_CURLY "}" WHITESPACE "\n" diff --git a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs index d2ba89ca60..fe9a3211bb 100644 --- a/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs +++ b/crates/parser/test_data/parser/inline/ok/0137_await_expr.rs @@ -2,4 +2,6 @@ fn foo() { x.await; x.0.await; x.0().await?.hello(); + x.0.0.await; + x.0. await; } From 9053bcc65c41707e2272757fdccf3a97e167217d Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Fri, 3 Feb 2023 21:39:24 +0100 Subject: [PATCH 3/8] Make mbe compile with parser changes --- crates/mbe/src/syntax_bridge.rs | 1 + crates/mbe/src/tt_iter.rs | 12 ++++++++---- crates/tt/src/buffer.rs | 8 +++++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index fbf6b53006..7fe4fcfc68 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -95,6 +95,7 @@ pub fn token_tree_to_syntax_node( parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => { tree_sink.token(kind, n_raw_tokens) } + parser::Step::FloatSplit { .. } => tree_sink.token(SyntaxKind::FLOAT_NUMBER, 1), parser::Step::Enter { kind } => tree_sink.start_node(kind), parser::Step::Exit => tree_sink.finish_node(), parser::Step::Error { msg } => tree_sink.error(msg.to_string()), diff --git a/crates/mbe/src/tt_iter.rs b/crates/mbe/src/tt_iter.rs index e5f6b13722..b38243caee 100644 --- a/crates/mbe/src/tt_iter.rs +++ b/crates/mbe/src/tt_iter.rs @@ -140,6 +140,7 @@ impl<'a> TtIter<'a> { let mut cursor = buffer.begin(); let mut error = false; + let mut float_splits = vec![]; for step in tree_traversal.iter() { match step { parser::Step::Token { kind, mut n_input_tokens } => { @@ -150,6 +151,10 @@ impl<'a> TtIter<'a> { cursor = cursor.bump_subtree(); } } + parser::Step::FloatSplit { .. } => { + float_splits.push(cursor); + cursor = cursor.bump_subtree(); + } parser::Step::Enter { .. } | parser::Step::Exit => (), parser::Step::Error { .. } => error = true, } @@ -167,18 +172,17 @@ impl<'a> TtIter<'a> { if cursor.is_root() { while curr != cursor { if let Some(token) = curr.token_tree() { - res.push(token); + res.push(token.cloned()); } curr = curr.bump(); } } self.inner = self.inner.as_slice()[res.len()..].iter(); let res = match res.len() { - 1 => Some(res[0].cloned()), - 0 => None, + 0 | 1 => res.pop(), _ => Some(tt::TokenTree::Subtree(tt::Subtree { delimiter: tt::Delimiter::unspecified(), - token_trees: res.into_iter().map(|it| it.cloned()).collect(), + token_trees: res, })), }; ExpandResult { value: res, err } diff --git a/crates/tt/src/buffer.rs b/crates/tt/src/buffer.rs index 4484431124..c4b455e3f1 100644 --- a/crates/tt/src/buffer.rs +++ b/crates/tt/src/buffer.rs @@ -16,8 +16,8 @@ enum Entry<'t, Span> { // Mimicking types from proc-macro. Subtree(Option<&'t TokenTree>, &'t Subtree, EntryId), Leaf(&'t TokenTree), - // End entries contain a pointer to the entry from the containing - // token tree, or None if this is the outermost level. + /// End entries contain a pointer to the entry from the containing + /// token tree, or [`None`] if this is the outermost level. End(Option), } @@ -226,7 +226,9 @@ impl<'a, Span> Cursor<'a, Span> { /// a cursor into that subtree pub fn bump_subtree(self) -> Cursor<'a, Span> { match self.entry() { - Some(Entry::Subtree(_, _, _)) => self.subtree().unwrap(), + Some(&Entry::Subtree(_, _, entry_id)) => { + Cursor::create(self.buffer, EntryPtr(entry_id, 0)) + } _ => self.bump(), } } From c6e7917d6ed2ae36534b064a51697b54f497e02e Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 7 Feb 2023 15:21:37 +0100 Subject: [PATCH 4/8] Fix up token_tree_to_syntax_node float split handling --- crates/hir-def/src/item_tree.rs | 5 ++- .../src/macro_expansion_tests/proc_macros.rs | 7 ++-- crates/mbe/src/syntax_bridge.rs | 37 ++++++++++++++++++- crates/mbe/src/tt_iter.rs | 35 ++++++++++++++++-- crates/parser/src/lib.rs | 4 +- 5 files changed, 76 insertions(+), 12 deletions(-) diff --git a/crates/hir-def/src/item_tree.rs b/crates/hir-def/src/item_tree.rs index 3e1f7d4446..19d01630ef 100644 --- a/crates/hir-def/src/item_tree.rs +++ b/crates/hir-def/src/item_tree.rs @@ -111,7 +111,8 @@ impl ItemTree { Some(node) => node, None => return Default::default(), }; - if never!(syntax.kind() == SyntaxKind::ERROR) { + if never!(syntax.kind() == SyntaxKind::ERROR, "{:?} from {:?} {}", file_id, syntax, syntax) + { // FIXME: not 100% sure why these crop up, but return an empty tree to avoid a panic return Default::default(); } @@ -133,7 +134,7 @@ impl ItemTree { ctx.lower_macro_stmts(stmts) }, _ => { - panic!("cannot create item tree from {syntax:?} {syntax}"); + panic!("cannot create item tree for file {file_id:?} from {syntax:?} {syntax}"); }, } }; diff --git a/crates/hir-def/src/macro_expansion_tests/proc_macros.rs b/crates/hir-def/src/macro_expansion_tests/proc_macros.rs index 118c14ed84..822bdcc122 100644 --- a/crates/hir-def/src/macro_expansion_tests/proc_macros.rs +++ b/crates/hir-def/src/macro_expansion_tests/proc_macros.rs @@ -104,7 +104,7 @@ macro_rules! id { $($t)* }; } -id /*+errors*/! { +id! { #[proc_macros::identity] impl Foo for WrapBj { async fn foo(&self) { @@ -113,18 +113,17 @@ id /*+errors*/! { } } "#, - expect![[r##" + expect![[r#" macro_rules! id { ($($t:tt)*) => { $($t)* }; } -/* parse error: expected SEMICOLON */ #[proc_macros::identity] impl Foo for WrapBj { async fn foo(&self ) { self .0.id().await ; } } -"##]], +"#]], ); } diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 7fe4fcfc68..8b9a3bca02 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -95,7 +95,7 @@ pub fn token_tree_to_syntax_node( parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => { tree_sink.token(kind, n_raw_tokens) } - parser::Step::FloatSplit { .. } => tree_sink.token(SyntaxKind::FLOAT_NUMBER, 1), + parser::Step::FloatSplit { has_pseudo_dot } => tree_sink.float_split(has_pseudo_dot), parser::Step::Enter { kind } => tree_sink.start_node(kind), parser::Step::Exit => tree_sink.finish_node(), parser::Step::Error { msg } => tree_sink.error(msg.to_string()), @@ -797,6 +797,41 @@ fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { } impl<'a> TtTreeSink<'a> { + fn float_split(&mut self, has_pseudo_dot: bool) { + let (text, _span) = match self.cursor.token_tree() { + Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Literal(lit), _)) => { + (lit.text.as_str(), lit.span) + } + _ => unreachable!(), + }; + match text.split_once('.') { + Some((left, right)) => { + assert!(!left.is_empty()); + self.inner.start_node(SyntaxKind::NAME_REF); + self.inner.token(SyntaxKind::INT_NUMBER, left); + self.inner.finish_node(); + + // here we move the exit up, the original exit has been deleted in process + self.inner.finish_node(); + + self.inner.token(SyntaxKind::DOT, "."); + + if has_pseudo_dot { + assert!(right.is_empty()); + } else { + self.inner.start_node(SyntaxKind::NAME_REF); + self.inner.token(SyntaxKind::INT_NUMBER, right); + self.inner.finish_node(); + + // the parser creates an unbalanced start node, we are required to close it here + self.inner.finish_node(); + } + } + None => unreachable!(), + } + self.cursor = self.cursor.bump(); + } + fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { if kind == LIFETIME_IDENT { n_tokens = 2; diff --git a/crates/mbe/src/tt_iter.rs b/crates/mbe/src/tt_iter.rs index b38243caee..c05a2ca150 100644 --- a/crates/mbe/src/tt_iter.rs +++ b/crates/mbe/src/tt_iter.rs @@ -170,11 +170,38 @@ impl<'a> TtIter<'a> { let mut res = vec![]; if cursor.is_root() { - while curr != cursor { - if let Some(token) = curr.token_tree() { - res.push(token.cloned()); + if float_splits.is_empty() { + while curr != cursor { + if let Some(token) = curr.token_tree() { + res.push(token.cloned()); + } + curr = curr.bump(); + } + } else { + // let mut float_splits = float_splits.into_iter().peekable(); + // while let Some(tt) = curr.token_tree() { + // let mut tt = tt.cloned(); + // let mut tt_mut_ref = &mut tt; + // if let Some(fs) = float_splits.peek() { + // loop { + // curr = curr.bump_subtree(); + // if curr == *fs { + // float_splits.next(); + // } + // if curr.is_root() { + // break; + // } + // } + // } + // res.push(tt); + // } + + while curr != cursor { + if let Some(token) = curr.token_tree() { + res.push(token.cloned()); + } + curr = curr.bump(); } - curr = curr.bump(); } } self.inner = self.inner.as_slice()[res.len()..].iter(); diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index f20d32d6cf..6c72b5994b 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -102,10 +102,12 @@ impl TopEntryPoint { match step { Step::Enter { .. } => depth += 1, Step::Exit => depth -= 1, - Step::FloatSplit { .. } | Step::Token { .. } | Step::Error { .. } => (), + Step::FloatSplit { .. } => depth -= 1, + Step::Token { .. } | Step::Error { .. } => (), } } assert!(!first, "no tree at all"); + assert_eq!(depth, 0, "unbalanced tree"); } res From f6539b139e185c5bb08de5b8ff0275b47c70df43 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 7 Feb 2023 15:31:51 +0100 Subject: [PATCH 5/8] fix depth check for float split step --- crates/parser/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 6c72b5994b..9b895ff3ca 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -102,7 +102,7 @@ impl TopEntryPoint { match step { Step::Enter { .. } => depth += 1, Step::Exit => depth -= 1, - Step::FloatSplit { .. } => depth -= 1, + Step::FloatSplit { has_pseudo_dot } => depth -= 1 + !has_pseudo_dot as usize, Step::Token { .. } | Step::Error { .. } => (), } } From e59487de38b1be5b06b92eb4a9d30c0adb32d9db Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 7 Feb 2023 17:12:24 +0100 Subject: [PATCH 6/8] Add tests for float access macro call inputs --- .../hir-def/src/macro_expansion_tests/mbe.rs | 35 ++++++++++++++++ crates/mbe/src/syntax_bridge.rs | 2 +- crates/mbe/src/to_parser_input.rs | 4 ++ crates/mbe/src/tt_iter.rs | 41 ++++--------------- crates/tt/src/buffer.rs | 13 +++++- 5 files changed, 58 insertions(+), 37 deletions(-) diff --git a/crates/hir-def/src/macro_expansion_tests/mbe.rs b/crates/hir-def/src/macro_expansion_tests/mbe.rs index 2d5f2a692e..49bbc64bff 100644 --- a/crates/hir-def/src/macro_expansion_tests/mbe.rs +++ b/crates/hir-def/src/macro_expansion_tests/mbe.rs @@ -97,6 +97,41 @@ fn#19 main#20(#21)#21 {#22 "##]], ); } +#[test] +fn float_field_acces_macro_input() { + check( + r#" +macro_rules! foo { + ($expr:expr) => { + fn foo() { + $expr; + } + }; +} +foo!(x .0.1); +foo!(x .2. 3); +foo!(x .4 .5); +"#, + expect![[r#" +macro_rules! foo { + ($expr:expr) => { + fn foo() { + $expr; + } + }; +} +fn foo() { + (x.0.1); +} +fn foo() { + (x.2.3); +} +fn foo() { + (x.4.5); +} +"#]], + ); +} #[test] fn mbe_smoke_test() { diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 8b9a3bca02..a4e3efaeb5 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -817,7 +817,7 @@ impl<'a> TtTreeSink<'a> { self.inner.token(SyntaxKind::DOT, "."); if has_pseudo_dot { - assert!(right.is_empty()); + assert!(right.is_empty(), "{left}.{right}"); } else { self.inner.start_node(SyntaxKind::NAME_REF); self.inner.token(SyntaxKind::INT_NUMBER, right); diff --git a/crates/mbe/src/to_parser_input.rs b/crates/mbe/src/to_parser_input.rs index d4c19b3ab8..6d20998bb4 100644 --- a/crates/mbe/src/to_parser_input.rs +++ b/crates/mbe/src/to_parser_input.rs @@ -45,6 +45,10 @@ pub(crate) fn to_parser_input(buffer: &TokenBuffer<'_>) -> parser::Input { .unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &lit)); res.push(kind); + + if kind == FLOAT_NUMBER && !inner_text.ends_with('.') { + res.was_joint(); + } } tt::Leaf::Ident(ident) => match ident.text.as_ref() { "_" => res.push(T![_]), diff --git a/crates/mbe/src/tt_iter.rs b/crates/mbe/src/tt_iter.rs index c05a2ca150..f744481f3a 100644 --- a/crates/mbe/src/tt_iter.rs +++ b/crates/mbe/src/tt_iter.rs @@ -140,7 +140,6 @@ impl<'a> TtIter<'a> { let mut cursor = buffer.begin(); let mut error = false; - let mut float_splits = vec![]; for step in tree_traversal.iter() { match step { parser::Step::Token { kind, mut n_input_tokens } => { @@ -152,7 +151,8 @@ impl<'a> TtIter<'a> { } } parser::Step::FloatSplit { .. } => { - float_splits.push(cursor); + // FIXME: We need to split the tree properly here, but mutating the token trees + // in the buffer is somewhat tricky to pull off. cursor = cursor.bump_subtree(); } parser::Step::Enter { .. } | parser::Step::Exit => (), @@ -170,40 +170,13 @@ impl<'a> TtIter<'a> { let mut res = vec![]; if cursor.is_root() { - if float_splits.is_empty() { - while curr != cursor { - if let Some(token) = curr.token_tree() { - res.push(token.cloned()); - } - curr = curr.bump(); - } - } else { - // let mut float_splits = float_splits.into_iter().peekable(); - // while let Some(tt) = curr.token_tree() { - // let mut tt = tt.cloned(); - // let mut tt_mut_ref = &mut tt; - // if let Some(fs) = float_splits.peek() { - // loop { - // curr = curr.bump_subtree(); - // if curr == *fs { - // float_splits.next(); - // } - // if curr.is_root() { - // break; - // } - // } - // } - // res.push(tt); - // } - - while curr != cursor { - if let Some(token) = curr.token_tree() { - res.push(token.cloned()); - } - curr = curr.bump(); - } + while curr != cursor { + let Some(token) = curr.token_tree() else { break }; + res.push(token.cloned()); + curr = curr.bump(); } } + self.inner = self.inner.as_slice()[res.len()..].iter(); let res = match res.len() { 0 | 1 => res.pop(), diff --git a/crates/tt/src/buffer.rs b/crates/tt/src/buffer.rs index c4b455e3f1..0615a3763d 100644 --- a/crates/tt/src/buffer.rs +++ b/crates/tt/src/buffer.rs @@ -7,7 +7,12 @@ use crate::{Leaf, Subtree, TokenTree}; struct EntryId(usize); #[derive(Copy, Clone, Debug, Eq, PartialEq)] -struct EntryPtr(EntryId, usize); +struct EntryPtr( + /// The index of the buffer containing the entry. + EntryId, + /// The index of the entry within the buffer. + usize, +); /// Internal type which is used instead of `TokenTree` to represent a token tree /// within a `TokenBuffer`. @@ -229,7 +234,11 @@ impl<'a, Span> Cursor<'a, Span> { Some(&Entry::Subtree(_, _, entry_id)) => { Cursor::create(self.buffer, EntryPtr(entry_id, 0)) } - _ => self.bump(), + Some(Entry::End(exit)) => match exit { + Some(exit) => Cursor::create(self.buffer, *exit), + None => self, + }, + _ => Cursor::create(self.buffer, EntryPtr(self.ptr.0, self.ptr.1 + 1)), } } From 27cd509558a0fd9b47d267e445097363eb9db8ff Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 7 Feb 2023 17:41:16 +0100 Subject: [PATCH 7/8] fix jointess for floats not being set properly --- crates/parser/src/shortcuts.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index 18a6f838fa..21939c3494 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -43,18 +43,16 @@ impl<'a> LexedStr<'a> { res.was_joint(); } res.push(kind); - } - if kind == SyntaxKind::FLOAT_NUMBER { // we set jointness for floating point numbers as a hack to inform the // parser about whether we have a `0.` or `0.1` style float - if self.text(i).split_once('.').map_or(false, |(_, it)| it.is_empty()) { - was_joint = false; - } else { - was_joint = true; + if kind == SyntaxKind::FLOAT_NUMBER { + if !self.text(i).split_once('.').map_or(true, |(_, it)| it.is_empty()) { + res.was_joint(); + } } - } else { - was_joint = true; } + + was_joint = true; } } res @@ -202,7 +200,7 @@ impl Builder<'_, '_> { (self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." }); if has_pseudo_dot { - assert!(right.is_empty()); + assert!(right.is_empty(), "{left}.{right}"); self.state = State::Normal; } else { (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF }); From a756c9ad0825d0a113e406adddda8629f2db1214 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 7 Feb 2023 18:08:05 +0100 Subject: [PATCH 8/8] Fixup comments --- crates/mbe/src/syntax_bridge.rs | 6 +++++- crates/mbe/src/to_parser_input.rs | 3 +++ crates/parser/src/event.rs | 11 +++++++---- crates/parser/src/lib.rs | 4 +++- crates/parser/src/output.rs | 8 ++++---- crates/parser/src/parser.rs | 21 +++++++++------------ crates/parser/src/shortcuts.rs | 11 +++++++---- 7 files changed, 38 insertions(+), 26 deletions(-) diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index a4e3efaeb5..fb53134010 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -95,7 +95,9 @@ pub fn token_tree_to_syntax_node( parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => { tree_sink.token(kind, n_raw_tokens) } - parser::Step::FloatSplit { has_pseudo_dot } => tree_sink.float_split(has_pseudo_dot), + parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { + tree_sink.float_split(has_pseudo_dot) + } parser::Step::Enter { kind } => tree_sink.start_node(kind), parser::Step::Exit => tree_sink.finish_node(), parser::Step::Error { msg } => tree_sink.error(msg.to_string()), @@ -797,6 +799,8 @@ fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> { } impl<'a> TtTreeSink<'a> { + /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. + /// This occurs when a float literal is used as a field access. fn float_split(&mut self, has_pseudo_dot: bool) { let (text, _span) = match self.cursor.token_tree() { Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Literal(lit), _)) => { diff --git a/crates/mbe/src/to_parser_input.rs b/crates/mbe/src/to_parser_input.rs index 6d20998bb4..051e20b3a3 100644 --- a/crates/mbe/src/to_parser_input.rs +++ b/crates/mbe/src/to_parser_input.rs @@ -47,6 +47,9 @@ pub(crate) fn to_parser_input(buffer: &TokenBuffer<'_>) -> parser::Input { res.push(kind); if kind == FLOAT_NUMBER && !inner_text.ends_with('.') { + // Tag the token as joint if it is float with a fractional part + // we use this jointness to inform the parser about what token split + // event to emit when we encounter a float literal in a field access res.was_joint(); } } diff --git a/crates/parser/src/event.rs b/crates/parser/src/event.rs index fb2616cf01..577eb0967b 100644 --- a/crates/parser/src/event.rs +++ b/crates/parser/src/event.rs @@ -72,11 +72,14 @@ pub(crate) enum Event { /// `n_raw_tokens = 2` is used to produced a single `>>`. Token { kind: SyntaxKind, - // Consider custom enum here? n_raw_tokens: u8, }, + /// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal + /// instead of an integer literal followed by a dot as the lexer has no contextual knowledge. + /// This event instructs whatever consumes the events to split the float literal into + /// the corresponding parts. FloatSplitHack { - has_pseudo_dot: bool, + ends_in_dot: bool, }, Error { msg: String, @@ -128,8 +131,8 @@ pub(super) fn process(mut events: Vec) -> Output { Event::Token { kind, n_raw_tokens } => { res.token(kind, n_raw_tokens); } - Event::FloatSplitHack { has_pseudo_dot } => { - res.float_split_hack(has_pseudo_dot); + Event::FloatSplitHack { ends_in_dot } => { + res.float_split_hack(ends_in_dot); let ev = mem::replace(&mut events[i + 1], Event::tombstone()); assert!(matches!(ev, Event::Finish), "{ev:?}"); } diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 9b895ff3ca..8c5aed0232 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -102,7 +102,9 @@ impl TopEntryPoint { match step { Step::Enter { .. } => depth += 1, Step::Exit => depth -= 1, - Step::FloatSplit { has_pseudo_dot } => depth -= 1 + !has_pseudo_dot as usize, + Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { + depth -= 1 + !has_pseudo_dot as usize + } Step::Token { .. } | Step::Error { .. } => (), } } diff --git a/crates/parser/src/output.rs b/crates/parser/src/output.rs index 9587c8cb1b..41d4c68b2d 100644 --- a/crates/parser/src/output.rs +++ b/crates/parser/src/output.rs @@ -25,7 +25,7 @@ pub struct Output { #[derive(Debug)] pub enum Step<'a> { Token { kind: SyntaxKind, n_input_tokens: u8 }, - FloatSplit { has_pseudo_dot: bool }, + FloatSplit { ends_in_dot: bool }, Enter { kind: SyntaxKind }, Exit, Error { msg: &'a str }, @@ -70,7 +70,7 @@ impl Output { } Self::EXIT_EVENT => Step::Exit, Self::SPLIT_EVENT => { - Step::FloatSplit { has_pseudo_dot: event & Self::N_INPUT_TOKEN_MASK != 0 } + Step::FloatSplit { ends_in_dot: event & Self::N_INPUT_TOKEN_MASK != 0 } } _ => unreachable!(), } @@ -84,9 +84,9 @@ impl Output { self.event.push(e) } - pub(crate) fn float_split_hack(&mut self, has_pseudo_dot: bool) { + pub(crate) fn float_split_hack(&mut self, ends_in_dot: bool) { let e = (Self::SPLIT_EVENT as u32) << Self::TAG_SHIFT - | ((has_pseudo_dot as u32) << Self::N_INPUT_TOKEN_SHIFT) + | ((ends_in_dot as u32) << Self::N_INPUT_TOKEN_SHIFT) | Self::EVENT_MASK; self.event.push(e); } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index 0f4fa60229..280416ae7c 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -182,7 +182,7 @@ impl<'t> Parser<'t> { } /// Advances the parser by one token - pub(crate) fn split_float(&mut self, marker: Marker) -> (bool, Marker) { + pub(crate) fn split_float(&mut self, mut marker: Marker) -> (bool, Marker) { assert!(self.at(SyntaxKind::FLOAT_NUMBER)); // we have parse `.` // ``.0.1 @@ -191,26 +191,23 @@ impl<'t> Parser<'t> { // ``. 0. 1; // here we need to change the follow up parse, the return value will cause us to emulate a dot // the actual splitting happens later - let has_pseudo_dot = !self.inp.is_joint(self.pos); - let marker = if !has_pseudo_dot { - let new_pos = self.start(); + let ends_in_dot = !self.inp.is_joint(self.pos); + if !ends_in_dot { + let new_marker = self.start(); let idx = marker.pos as usize; match &mut self.events[idx] { Event::Start { forward_parent, kind } => { *kind = SyntaxKind::FIELD_EXPR; - *forward_parent = Some(new_pos.pos - marker.pos); + *forward_parent = Some(new_marker.pos - marker.pos); } _ => unreachable!(), } - // NOTE: This brings the start / finish pairs out of balance! - std::mem::forget(marker); - new_pos - } else { - marker + marker.bomb.defuse(); + marker = new_marker; }; self.pos += 1 as usize; - self.push_event(Event::FloatSplitHack { has_pseudo_dot }); - (has_pseudo_dot, marker) + self.push_event(Event::FloatSplitHack { ends_in_dot }); + (ends_in_dot, marker) } /// Advances the parser by one token, remapping its kind. diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index 21939c3494..47e4adcbbe 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -43,10 +43,11 @@ impl<'a> LexedStr<'a> { res.was_joint(); } res.push(kind); - // we set jointness for floating point numbers as a hack to inform the - // parser about whether we have a `0.` or `0.1` style float + // Tag the token as joint if it is float with a fractional part + // we use this jointness to inform the parser about what token split + // event to emit when we encounter a float literal in a field access if kind == SyntaxKind::FLOAT_NUMBER { - if !self.text(i).split_once('.').map_or(true, |(_, it)| it.is_empty()) { + if !self.text(i).ends_with('.') { res.was_joint(); } } @@ -71,7 +72,9 @@ impl<'a> LexedStr<'a> { Step::Token { kind, n_input_tokens: n_raw_tokens } => { builder.token(kind, n_raw_tokens) } - Step::FloatSplit { has_pseudo_dot } => builder.float_split(has_pseudo_dot), + Step::FloatSplit { ends_in_dot: has_pseudo_dot } => { + builder.float_split(has_pseudo_dot) + } Step::Enter { kind } => builder.enter(kind), Step::Exit => builder.exit(), Step::Error { msg } => {