From 1637ff777efeb421ab992e0a2a80374adbbad4f3 Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Thu, 3 Jul 2025 19:52:32 +0300 Subject: [PATCH] Include a per-token edition in the parser input Because, as it turns out, this is necessary to determine the correct edition (it is not global). The next commits will make use of it. --- crates/parser/src/input.rs | 18 +++++++++---- crates/parser/src/shortcuts.rs | 3 ++- crates/syntax-bridge/src/to_parser_input.rs | 28 ++++++++++----------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/crates/parser/src/input.rs b/crates/parser/src/input.rs index 331bc58dd0..57eeb431cd 100644 --- a/crates/parser/src/input.rs +++ b/crates/parser/src/input.rs @@ -1,5 +1,7 @@ //! See [`Input`]. +use edition::Edition; + use crate::SyntaxKind; #[allow(non_camel_case_types)] @@ -16,6 +18,7 @@ pub struct Input { kind: Vec, joint: Vec, contextual_kind: Vec, + edition: Vec, } /// `pub` impl used by callers to create `Tokens`. @@ -26,15 +29,16 @@ impl Input { kind: Vec::with_capacity(capacity), joint: Vec::with_capacity(capacity / size_of::()), contextual_kind: Vec::with_capacity(capacity), + edition: Vec::with_capacity(capacity), } } #[inline] - pub fn push(&mut self, kind: SyntaxKind) { - self.push_impl(kind, SyntaxKind::EOF) + pub fn push(&mut self, kind: SyntaxKind, edition: Edition) { + self.push_impl(kind, SyntaxKind::EOF, edition) } #[inline] - pub fn push_ident(&mut self, contextual_kind: SyntaxKind) { - self.push_impl(SyntaxKind::IDENT, contextual_kind) + pub fn push_ident(&mut self, contextual_kind: SyntaxKind, edition: Edition) { + self.push_impl(SyntaxKind::IDENT, contextual_kind, edition) } /// Sets jointness for the last token we've pushed. /// @@ -59,13 +63,14 @@ impl Input { self.joint[idx] |= 1 << b_idx; } #[inline] - fn push_impl(&mut self, kind: SyntaxKind, contextual_kind: SyntaxKind) { + fn push_impl(&mut self, kind: SyntaxKind, contextual_kind: SyntaxKind, edition: Edition) { let idx = self.len(); if idx.is_multiple_of(bits::BITS as usize) { self.joint.push(0); } self.kind.push(kind); self.contextual_kind.push(contextual_kind); + self.edition.push(edition); } } @@ -77,6 +82,9 @@ impl Input { pub(crate) fn contextual_kind(&self, idx: usize) -> SyntaxKind { self.contextual_kind.get(idx).copied().unwrap_or(SyntaxKind::EOF) } + pub(crate) fn edition(&self, idx: usize) -> Edition { + self.edition[idx] + } pub(crate) fn is_joint(&self, n: usize) -> bool { let (idx, b_idx) = self.bit_index(n); self.joint[idx] & (1 << b_idx) != 0 diff --git a/crates/parser/src/shortcuts.rs b/crates/parser/src/shortcuts.rs index d5e513933f..3c19e02545 100644 --- a/crates/parser/src/shortcuts.rs +++ b/crates/parser/src/shortcuts.rs @@ -38,12 +38,13 @@ impl LexedStr<'_> { res.push_ident( SyntaxKind::from_contextual_keyword(token_text, edition) .unwrap_or(SyntaxKind::IDENT), + edition, ) } else { if was_joint { res.was_joint(); } - res.push(kind); + res.push(kind, edition); // Tag the token as joint if it is float with a fractional part // we use this jointness to inform the parser about what token split // event to emit when we encounter a float literal in a field access diff --git a/crates/syntax-bridge/src/to_parser_input.rs b/crates/syntax-bridge/src/to_parser_input.rs index c0ff8e1db2..5922994c08 100644 --- a/crates/syntax-bridge/src/to_parser_input.rs +++ b/crates/syntax-bridge/src/to_parser_input.rs @@ -16,6 +16,8 @@ pub fn to_parser_input( let mut current = buffer.cursor(); let mut syntax_context_to_edition_cache = FxHashMap::default(); + let mut ctx_edition = + |ctx| *syntax_context_to_edition_cache.entry(ctx).or_insert_with(|| span_to_edition(ctx)); while !current.eof() { let tt = current.token_tree(); @@ -26,8 +28,8 @@ pub fn to_parser_input( { current.bump(); match current.token_tree() { - Some(tt::TokenTree::Leaf(tt::Leaf::Ident(_ident))) => { - res.push(LIFETIME_IDENT); + Some(tt::TokenTree::Leaf(tt::Leaf::Ident(ident))) => { + res.push(LIFETIME_IDENT, ctx_edition(ident.span.ctx)); current.bump(); continue; } @@ -51,7 +53,7 @@ pub fn to_parser_input( tt::LitKind::CStr | tt::LitKind::CStrRaw(_) => SyntaxKind::C_STRING, tt::LitKind::Err(_) => SyntaxKind::ERROR, }; - res.push(kind); + res.push(kind, ctx_edition(lit.span.ctx)); if kind == FLOAT_NUMBER && !lit.symbol.as_str().ends_with('.') { // Tag the token as joint if it is float with a fractional part @@ -61,20 +63,18 @@ pub fn to_parser_input( } } tt::Leaf::Ident(ident) => { - let edition = *syntax_context_to_edition_cache - .entry(ident.span.ctx) - .or_insert_with(|| span_to_edition(ident.span.ctx)); + let edition = ctx_edition(ident.span.ctx); match ident.sym.as_str() { - "_" => res.push(T![_]), - i if i.starts_with('\'') => res.push(LIFETIME_IDENT), - _ if ident.is_raw.yes() => res.push(IDENT), + "_" => res.push(T![_], edition), + i if i.starts_with('\'') => res.push(LIFETIME_IDENT, edition), + _ if ident.is_raw.yes() => res.push(IDENT, edition), text => match SyntaxKind::from_keyword(text, edition) { - Some(kind) => res.push(kind), + Some(kind) => res.push(kind, edition), None => { let contextual_keyword = SyntaxKind::from_contextual_keyword(text, edition) .unwrap_or(SyntaxKind::IDENT); - res.push_ident(contextual_keyword); + res.push_ident(contextual_keyword, edition); } }, } @@ -82,7 +82,7 @@ pub fn to_parser_input( tt::Leaf::Punct(punct) => { let kind = SyntaxKind::from_char(punct.char) .unwrap_or_else(|| panic!("{punct:#?} is not a valid punct")); - res.push(kind); + res.push(kind, ctx_edition(punct.span.ctx)); if punct.spacing == tt::Spacing::Joint { res.was_joint(); } @@ -97,7 +97,7 @@ pub fn to_parser_input( tt::DelimiterKind::Bracket => Some(T!['[']), tt::DelimiterKind::Invisible => None, } { - res.push(kind); + res.push(kind, ctx_edition(subtree.delimiter.open.ctx)); } current.bump(); } @@ -109,7 +109,7 @@ pub fn to_parser_input( tt::DelimiterKind::Bracket => Some(T![']']), tt::DelimiterKind::Invisible => None, } { - res.push(kind); + res.push(kind, ctx_edition(subtree.delimiter.close.ctx)); } } };