Include a per-token edition in the parser input

Because, as it turns out, this is necessary to determine the correct edition (it is not global).

The next commits will make use of it.
This commit is contained in:
Chayim Refael Friedman 2025-07-03 19:52:32 +03:00
parent 812cd91b9f
commit 1637ff777e
3 changed files with 29 additions and 20 deletions

View File

@ -1,5 +1,7 @@
//! See [`Input`].
use edition::Edition;
use crate::SyntaxKind;
#[allow(non_camel_case_types)]
@ -16,6 +18,7 @@ pub struct Input {
kind: Vec<SyntaxKind>,
joint: Vec<bits>,
contextual_kind: Vec<SyntaxKind>,
edition: Vec<Edition>,
}
/// `pub` impl used by callers to create `Tokens`.
@ -26,15 +29,16 @@ impl Input {
kind: Vec::with_capacity(capacity),
joint: Vec::with_capacity(capacity / size_of::<bits>()),
contextual_kind: Vec::with_capacity(capacity),
edition: Vec::with_capacity(capacity),
}
}
#[inline]
pub fn push(&mut self, kind: SyntaxKind) {
self.push_impl(kind, SyntaxKind::EOF)
pub fn push(&mut self, kind: SyntaxKind, edition: Edition) {
self.push_impl(kind, SyntaxKind::EOF, edition)
}
#[inline]
pub fn push_ident(&mut self, contextual_kind: SyntaxKind) {
self.push_impl(SyntaxKind::IDENT, contextual_kind)
pub fn push_ident(&mut self, contextual_kind: SyntaxKind, edition: Edition) {
self.push_impl(SyntaxKind::IDENT, contextual_kind, edition)
}
/// Sets jointness for the last token we've pushed.
///
@ -59,13 +63,14 @@ impl Input {
self.joint[idx] |= 1 << b_idx;
}
#[inline]
fn push_impl(&mut self, kind: SyntaxKind, contextual_kind: SyntaxKind) {
fn push_impl(&mut self, kind: SyntaxKind, contextual_kind: SyntaxKind, edition: Edition) {
let idx = self.len();
if idx.is_multiple_of(bits::BITS as usize) {
self.joint.push(0);
}
self.kind.push(kind);
self.contextual_kind.push(contextual_kind);
self.edition.push(edition);
}
}
@ -77,6 +82,9 @@ impl Input {
pub(crate) fn contextual_kind(&self, idx: usize) -> SyntaxKind {
self.contextual_kind.get(idx).copied().unwrap_or(SyntaxKind::EOF)
}
pub(crate) fn edition(&self, idx: usize) -> Edition {
self.edition[idx]
}
pub(crate) fn is_joint(&self, n: usize) -> bool {
let (idx, b_idx) = self.bit_index(n);
self.joint[idx] & (1 << b_idx) != 0

View File

@ -38,12 +38,13 @@ impl LexedStr<'_> {
res.push_ident(
SyntaxKind::from_contextual_keyword(token_text, edition)
.unwrap_or(SyntaxKind::IDENT),
edition,
)
} else {
if was_joint {
res.was_joint();
}
res.push(kind);
res.push(kind, edition);
// Tag the token as joint if it is float with a fractional part
// we use this jointness to inform the parser about what token split
// event to emit when we encounter a float literal in a field access

View File

@ -16,6 +16,8 @@ pub fn to_parser_input<Ctx: Copy + fmt::Debug + PartialEq + Eq + Hash>(
let mut current = buffer.cursor();
let mut syntax_context_to_edition_cache = FxHashMap::default();
let mut ctx_edition =
|ctx| *syntax_context_to_edition_cache.entry(ctx).or_insert_with(|| span_to_edition(ctx));
while !current.eof() {
let tt = current.token_tree();
@ -26,8 +28,8 @@ pub fn to_parser_input<Ctx: Copy + fmt::Debug + PartialEq + Eq + Hash>(
{
current.bump();
match current.token_tree() {
Some(tt::TokenTree::Leaf(tt::Leaf::Ident(_ident))) => {
res.push(LIFETIME_IDENT);
Some(tt::TokenTree::Leaf(tt::Leaf::Ident(ident))) => {
res.push(LIFETIME_IDENT, ctx_edition(ident.span.ctx));
current.bump();
continue;
}
@ -51,7 +53,7 @@ pub fn to_parser_input<Ctx: Copy + fmt::Debug + PartialEq + Eq + Hash>(
tt::LitKind::CStr | tt::LitKind::CStrRaw(_) => SyntaxKind::C_STRING,
tt::LitKind::Err(_) => SyntaxKind::ERROR,
};
res.push(kind);
res.push(kind, ctx_edition(lit.span.ctx));
if kind == FLOAT_NUMBER && !lit.symbol.as_str().ends_with('.') {
// Tag the token as joint if it is float with a fractional part
@ -61,20 +63,18 @@ pub fn to_parser_input<Ctx: Copy + fmt::Debug + PartialEq + Eq + Hash>(
}
}
tt::Leaf::Ident(ident) => {
let edition = *syntax_context_to_edition_cache
.entry(ident.span.ctx)
.or_insert_with(|| span_to_edition(ident.span.ctx));
let edition = ctx_edition(ident.span.ctx);
match ident.sym.as_str() {
"_" => res.push(T![_]),
i if i.starts_with('\'') => res.push(LIFETIME_IDENT),
_ if ident.is_raw.yes() => res.push(IDENT),
"_" => res.push(T![_], edition),
i if i.starts_with('\'') => res.push(LIFETIME_IDENT, edition),
_ if ident.is_raw.yes() => res.push(IDENT, edition),
text => match SyntaxKind::from_keyword(text, edition) {
Some(kind) => res.push(kind),
Some(kind) => res.push(kind, edition),
None => {
let contextual_keyword =
SyntaxKind::from_contextual_keyword(text, edition)
.unwrap_or(SyntaxKind::IDENT);
res.push_ident(contextual_keyword);
res.push_ident(contextual_keyword, edition);
}
},
}
@ -82,7 +82,7 @@ pub fn to_parser_input<Ctx: Copy + fmt::Debug + PartialEq + Eq + Hash>(
tt::Leaf::Punct(punct) => {
let kind = SyntaxKind::from_char(punct.char)
.unwrap_or_else(|| panic!("{punct:#?} is not a valid punct"));
res.push(kind);
res.push(kind, ctx_edition(punct.span.ctx));
if punct.spacing == tt::Spacing::Joint {
res.was_joint();
}
@ -97,7 +97,7 @@ pub fn to_parser_input<Ctx: Copy + fmt::Debug + PartialEq + Eq + Hash>(
tt::DelimiterKind::Bracket => Some(T!['[']),
tt::DelimiterKind::Invisible => None,
} {
res.push(kind);
res.push(kind, ctx_edition(subtree.delimiter.open.ctx));
}
current.bump();
}
@ -109,7 +109,7 @@ pub fn to_parser_input<Ctx: Copy + fmt::Debug + PartialEq + Eq + Hash>(
tt::DelimiterKind::Bracket => Some(T![']']),
tt::DelimiterKind::Invisible => None,
} {
res.push(kind);
res.push(kind, ctx_edition(subtree.delimiter.close.ctx));
}
}
};