From 288ca5dc67ca3ed902821f07ae866478d9a56cb5 Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Mon, 29 Dec 2025 10:54:10 +0200 Subject: [PATCH] Compress token trees for best memory usage --- Cargo.lock | 2 + crates/syntax-bridge/src/lib.rs | 13 +- crates/syntax-bridge/src/tests.rs | 4 +- crates/tt/Cargo.toml | 2 + crates/tt/src/buffer.rs | 37 +- crates/tt/src/iter.rs | 94 ++- crates/tt/src/lib.rs | 552 +++++------------ crates/tt/src/storage.rs | 992 ++++++++++++++++++++++++++++++ 8 files changed, 1259 insertions(+), 437 deletions(-) create mode 100644 crates/tt/src/storage.rs diff --git a/Cargo.lock b/Cargo.lock index 10927728a1..42eaeb01f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3085,8 +3085,10 @@ name = "tt" version = "0.0.0" dependencies = [ "arrayvec", + "indexmap", "intern", "ra-ap-rustc_lexer", + "rustc-hash 2.1.1", "span", "stdx", "text-size 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/crates/syntax-bridge/src/lib.rs b/crates/syntax-bridge/src/lib.rs index ce238eb932..0dcf18a4ad 100644 --- a/crates/syntax-bridge/src/lib.rs +++ b/crates/syntax-bridge/src/lib.rs @@ -866,7 +866,8 @@ impl TtTreeSink<'_> { /// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween. /// This occurs when a float literal is used as a field access. fn float_split(&mut self, has_pseudo_dot: bool) { - let (text, span) = match self.cursor.token_tree() { + let token_tree = self.cursor.token_tree(); + let (text, span) = match &token_tree { Some(tt::TokenTree::Leaf(tt::Leaf::Literal( lit @ tt::Literal { span, kind: tt::LitKind::Float, .. }, ))) => (lit.text(), *span), @@ -928,9 +929,15 @@ impl TtTreeSink<'_> { self.buf.push_str("r#"); self.text_pos += TextSize::of("r#"); } - let r = (ident.sym.as_str(), ident.span); + let text = ident.sym.as_str(); + self.buf += text; + self.text_pos += TextSize::of(text); + combined_span = match combined_span { + None => Some(ident.span), + Some(prev_span) => Some(Self::merge_spans(prev_span, ident.span)), + }; self.cursor.bump(); - r + continue 'tokens; } tt::Leaf::Punct(punct) => { assert!(punct.char.is_ascii()); diff --git a/crates/syntax-bridge/src/tests.rs b/crates/syntax-bridge/src/tests.rs index 8c28e1c5aa..16f2498bf3 100644 --- a/crates/syntax-bridge/src/tests.rs +++ b/crates/syntax-bridge/src/tests.rs @@ -36,9 +36,9 @@ fn check_punct_spacing(fixture: &str) { if let tt::TokenTree::Leaf(Leaf::Punct(Punct { spacing, span: Span { range, .. }, .. })) = token_tree - && let Some(expected) = annotations.remove(range) + && let Some(expected) = annotations.remove(&range) { - assert_eq!(expected, *spacing); + assert_eq!(expected, spacing); } cursor.bump(); } diff --git a/crates/tt/Cargo.toml b/crates/tt/Cargo.toml index 4b38bfb1e5..6cfb76400e 100644 --- a/crates/tt/Cargo.toml +++ b/crates/tt/Cargo.toml @@ -15,6 +15,8 @@ doctest = false [dependencies] arrayvec.workspace = true text-size.workspace = true +rustc-hash.workspace = true +indexmap.workspace = true span = { path = "../span", version = "0.0", default-features = false } stdx.workspace = true diff --git a/crates/tt/src/buffer.rs b/crates/tt/src/buffer.rs index de6379b5cd..78cf4b956d 100644 --- a/crates/tt/src/buffer.rs +++ b/crates/tt/src/buffer.rs @@ -1,17 +1,17 @@ //! Stateful iteration over token trees. //! //! We use this as the source of tokens for parser. -use crate::{Leaf, Subtree, TokenTree, TokenTreesView}; +use crate::{Leaf, Subtree, TokenTree, TokenTreesView, dispatch_ref}; pub struct Cursor<'a> { - buffer: &'a [TokenTree], + buffer: TokenTreesView<'a>, index: usize, subtrees_stack: Vec, } impl<'a> Cursor<'a> { pub fn new(buffer: TokenTreesView<'a>) -> Self { - Self { buffer: buffer.0, index: 0, subtrees_stack: Vec::new() } + Self { buffer, index: 0, subtrees_stack: Vec::new() } } /// Check whether it is eof @@ -23,16 +23,22 @@ impl<'a> Cursor<'a> { self.subtrees_stack.is_empty() } - fn last_subtree(&self) -> Option<(usize, &'a Subtree)> { + fn at(&self, idx: usize) -> Option { + dispatch_ref! { + match self.buffer.repr => tt => Some(tt.get(idx)?.to_api(self.buffer.span_parts)) + } + } + + fn last_subtree(&self) -> Option<(usize, Subtree)> { self.subtrees_stack.last().map(|&subtree_idx| { - let TokenTree::Subtree(subtree) = &self.buffer[subtree_idx] else { + let Some(TokenTree::Subtree(subtree)) = self.at(subtree_idx) else { panic!("subtree pointing to non-subtree"); }; (subtree_idx, subtree) }) } - pub fn end(&mut self) -> &'a Subtree { + pub fn end(&mut self) -> Subtree { let (last_subtree_idx, last_subtree) = self.last_subtree().expect("called `Cursor::end()` without an open subtree"); // +1 because `Subtree.len` excludes the subtree itself. @@ -46,14 +52,14 @@ impl<'a> Cursor<'a> { } /// Returns the `TokenTree` at the cursor if it is not at the end of a subtree. - pub fn token_tree(&self) -> Option<&'a TokenTree> { + pub fn token_tree(&self) -> Option { if let Some((last_subtree_idx, last_subtree)) = self.last_subtree() { // +1 because `Subtree.len` excludes the subtree itself. if last_subtree_idx + last_subtree.usize_len() + 1 == self.index { return None; } } - self.buffer.get(self.index) + self.at(self.index) } /// Bump the cursor, and enters a subtree if it is on one. @@ -66,7 +72,7 @@ impl<'a> Cursor<'a> { "called `Cursor::bump()` when at the end of a subtree" ); } - if let TokenTree::Subtree(_) = self.buffer[self.index] { + if let Some(TokenTree::Subtree(_)) = self.at(self.index) { self.subtrees_stack.push(self.index); } self.index += 1; @@ -81,13 +87,13 @@ impl<'a> Cursor<'a> { } } // +1 because `Subtree.len` excludes the subtree itself. - if let TokenTree::Subtree(_) = self.buffer[self.index] { + if let Some(TokenTree::Subtree(_)) = self.at(self.index) { self.subtrees_stack.push(self.index); } self.index += 1; } - pub fn peek_two_leaves(&self) -> Option<[&'a Leaf; 2]> { + pub fn peek_two_leaves(&self) -> Option<[Leaf; 2]> { if let Some((last_subtree_idx, last_subtree)) = self.last_subtree() { // +1 because `Subtree.len` excludes the subtree itself. let last_end = last_subtree_idx + last_subtree.usize_len() + 1; @@ -95,14 +101,17 @@ impl<'a> Cursor<'a> { return None; } } - self.buffer.get(self.index..self.index + 2).and_then(|it| match it { - [TokenTree::Leaf(a), TokenTree::Leaf(b)] => Some([a, b]), + self.at(self.index).zip(self.at(self.index + 1)).and_then(|it| match it { + (TokenTree::Leaf(a), TokenTree::Leaf(b)) => Some([a, b]), _ => None, }) } pub fn crossed(&self) -> TokenTreesView<'a> { assert!(self.is_root()); - TokenTreesView::new(&self.buffer[..self.index]) + TokenTreesView { + repr: self.buffer.repr.get(..self.index).unwrap(), + span_parts: self.buffer.span_parts, + } } } diff --git a/crates/tt/src/iter.rs b/crates/tt/src/iter.rs index 5ab9f94b63..7caacd40dd 100644 --- a/crates/tt/src/iter.rs +++ b/crates/tt/src/iter.rs @@ -7,11 +7,14 @@ use arrayvec::ArrayVec; use intern::sym; use span::Span; -use crate::{Ident, Leaf, MAX_GLUED_PUNCT_LEN, Punct, Spacing, Subtree, TokenTree, TokenTreesView}; +use crate::{ + Ident, Leaf, MAX_GLUED_PUNCT_LEN, Punct, Spacing, Subtree, TokenTree, TokenTreesReprRef, + TokenTreesView, dispatch_ref, +}; #[derive(Clone)] pub struct TtIter<'a> { - inner: std::slice::Iter<'a, TokenTree>, + inner: TokenTreesView<'a>, } impl fmt::Debug for TtIter<'_> { @@ -21,17 +24,17 @@ impl fmt::Debug for TtIter<'_> { } #[derive(Clone, Copy)] -pub struct TtIterSavepoint<'a>(&'a [TokenTree]); +pub struct TtIterSavepoint<'a>(TokenTreesView<'a>); impl<'a> TtIterSavepoint<'a> { pub fn remaining(self) -> TokenTreesView<'a> { - TokenTreesView::new(self.0) + self.0 } } impl<'a> TtIter<'a> { - pub(crate) fn new(tt: &'a [TokenTree]) -> TtIter<'a> { - TtIter { inner: tt.iter() } + pub(crate) fn new(tt: TokenTreesView<'a>) -> TtIter<'a> { + TtIter { inner: tt } } pub fn expect_char(&mut self, char: char) -> Result<(), ()> { @@ -141,8 +144,8 @@ impl<'a> TtIter<'a> { let _ = self.next().unwrap(); let _ = self.next().unwrap(); res.push(first); - res.push(*second); - res.push(*third.unwrap()); + res.push(second); + res.push(third.unwrap()); } ('-' | '!' | '*' | '/' | '&' | '%' | '^' | '+' | '<' | '=' | '>' | '|', '=', _) | ('-' | '=' | '>', '>', _) @@ -154,7 +157,7 @@ impl<'a> TtIter<'a> { | ('|', '|', _) => { let _ = self.next().unwrap(); res.push(first); - res.push(*second); + res.push(second); } _ => res.push(first), } @@ -162,17 +165,21 @@ impl<'a> TtIter<'a> { } /// This method won't check for subtrees, so the nth token tree may not be the nth sibling of the current tree. - fn peek_n(&self, n: usize) -> Option<&'a TokenTree> { - self.inner.as_slice().get(n) + fn peek_n(&self, n: usize) -> Option { + dispatch_ref! { + match self.inner.repr => tt => Some(tt.get(n)?.to_api(self.inner.span_parts)) + } } pub fn peek(&self) -> Option> { - match self.inner.as_slice().first()? { - TokenTree::Leaf(leaf) => Some(TtElement::Leaf(leaf.clone())), + match self.peek_n(0)? { + TokenTree::Leaf(leaf) => Some(TtElement::Leaf(leaf)), TokenTree::Subtree(subtree) => { - let nested_iter = - TtIter { inner: self.inner.as_slice()[1..][..subtree.usize_len()].iter() }; - Some(TtElement::Subtree(*subtree, nested_iter)) + let nested_repr = self.inner.repr.get(1..subtree.usize_len() + 1).unwrap(); + let nested_iter = TtIter { + inner: TokenTreesView { repr: nested_repr, span_parts: self.inner.span_parts }, + }; + Some(TtElement::Subtree(subtree, nested_iter)) } } } @@ -183,26 +190,51 @@ impl<'a> TtIter<'a> { } pub fn next_span(&self) -> Option { - Some(self.inner.as_slice().first()?.first_span()) + Some(self.peek()?.first_span()) } pub fn remaining(&self) -> TokenTreesView<'a> { - TokenTreesView::new(self.inner.as_slice()) + self.inner } /// **Warning**: This advances `skip` **flat** token trees, subtrees account for children+1! pub fn flat_advance(&mut self, skip: usize) { - self.inner = self.inner.as_slice()[skip..].iter(); + self.inner.repr = self.inner.repr.get(skip..).unwrap(); } pub fn savepoint(&self) -> TtIterSavepoint<'a> { - TtIterSavepoint(self.inner.as_slice()) + TtIterSavepoint(self.inner) } pub fn from_savepoint(&self, savepoint: TtIterSavepoint<'a>) -> TokenTreesView<'a> { - let len = (self.inner.as_slice().as_ptr() as usize - savepoint.0.as_ptr() as usize) - / size_of::(); - TokenTreesView::new(&savepoint.0[..len]) + let len = match (self.inner.repr, savepoint.0.repr) { + ( + TokenTreesReprRef::SpanStorage32(this), + TokenTreesReprRef::SpanStorage32(savepoint), + ) => { + (this.as_ptr() as usize - savepoint.as_ptr() as usize) + / size_of::>() + } + ( + TokenTreesReprRef::SpanStorage64(this), + TokenTreesReprRef::SpanStorage64(savepoint), + ) => { + (this.as_ptr() as usize - savepoint.as_ptr() as usize) + / size_of::>() + } + ( + TokenTreesReprRef::SpanStorage96(this), + TokenTreesReprRef::SpanStorage96(savepoint), + ) => { + (this.as_ptr() as usize - savepoint.as_ptr() as usize) + / size_of::>() + } + _ => panic!("savepoint did not originate from this TtIter"), + }; + TokenTreesView { + repr: savepoint.0.repr.get(..len).unwrap(), + span_parts: savepoint.0.span_parts, + } } pub fn next_as_view(&mut self) -> Option> { @@ -242,14 +274,12 @@ impl TtElement<'_> { impl<'a> Iterator for TtIter<'a> { type Item = TtElement<'a>; fn next(&mut self) -> Option { - match self.inner.next()? { - TokenTree::Leaf(leaf) => Some(TtElement::Leaf(leaf.clone())), - TokenTree::Subtree(subtree) => { - let nested_iter = - TtIter { inner: self.inner.as_slice()[..subtree.usize_len()].iter() }; - self.inner = self.inner.as_slice()[subtree.usize_len()..].iter(); - Some(TtElement::Subtree(*subtree, nested_iter)) - } - } + let result = self.peek()?; + let skip = match &result { + TtElement::Leaf(_) => 1, + TtElement::Subtree(subtree, _) => subtree.usize_len() + 1, + }; + self.inner.repr = self.inner.repr.get(skip..).unwrap(); + Some(result) } } diff --git a/crates/tt/src/lib.rs b/crates/tt/src/lib.rs index 91fcec9327..a59fc2e089 100644 --- a/crates/tt/src/lib.rs +++ b/crates/tt/src/lib.rs @@ -15,8 +15,9 @@ extern crate rustc_lexer; pub mod buffer; pub mod iter; +mod storage; -use std::fmt; +use std::{fmt, slice::SliceIndex}; use arrayvec::ArrayString; use buffer::Cursor; @@ -26,7 +27,10 @@ use stdx::{impl_from, itertools::Itertools as _}; pub use span::Span; pub use text_size::{TextRange, TextSize}; +use crate::storage::{CompressedSpanPart, SpanStorage}; + pub use self::iter::{TtElement, TtIter}; +pub use self::storage::{TopSubtree, TopSubtreeBuilder}; pub const MAX_GLUED_PUNCT_LEN: usize = 3; @@ -125,267 +129,71 @@ impl Subtree { } } -#[derive(Clone, PartialEq, Eq, Hash)] -pub struct TopSubtree(Box<[TokenTree]>); +/// `dispatch_ref! {}` +macro_rules! dispatch_ref { + ( + match $scrutinee:expr => $tt:ident => $body:expr + ) => { + match $scrutinee { + $crate::TokenTreesReprRef::SpanStorage32($tt) => $body, + $crate::TokenTreesReprRef::SpanStorage64($tt) => $body, + $crate::TokenTreesReprRef::SpanStorage96($tt) => $body, + } + }; +} +use dispatch_ref; -impl TopSubtree { - pub fn empty(span: DelimSpan) -> Self { - Self(Box::new([TokenTree::Subtree(Subtree { - delimiter: Delimiter::invisible_delim_spanned(span), - len: 0, - })])) - } +#[derive(Clone, Copy)] +enum TokenTreesReprRef<'a> { + SpanStorage32(&'a [crate::storage::TokenTree]), + SpanStorage64(&'a [crate::storage::TokenTree]), + SpanStorage96(&'a [crate::storage::TokenTree]), +} - pub fn invisible_from_leaves(delim_span: Span, leaves: [Leaf; N]) -> Self { - let mut builder = TopSubtreeBuilder::new(Delimiter::invisible_spanned(delim_span)); - builder.extend(leaves); - builder.build() - } - - pub fn from_token_trees(delimiter: Delimiter, token_trees: TokenTreesView<'_>) -> Self { - let mut builder = TopSubtreeBuilder::new(delimiter); - builder.extend_with_tt(token_trees); - builder.build() - } - - pub fn from_serialized(tt: Vec) -> Self { - Self(tt.into_boxed_slice()) - } - - pub fn from_subtree(subtree: SubtreeView<'_>) -> Self { - Self(subtree.0.into()) - } - - pub fn view(&self) -> SubtreeView<'_> { - SubtreeView::new(&self.0) - } - - pub fn iter(&self) -> TtIter<'_> { - self.view().iter() - } - - pub fn top_subtree(&self) -> Subtree { - self.view().top_subtree() - } - - pub fn set_top_subtree_delimiter_kind(&mut self, kind: DelimiterKind) { - self.top_subtree_mut().delimiter.kind = kind; - } - - pub fn set_top_subtree_delimiter_span(&mut self, span: DelimSpan) { - let top_subtree = self.top_subtree_mut(); - top_subtree.delimiter.open = span.open; - top_subtree.delimiter.close = span.close; - } - - fn top_subtree_mut(&mut self) -> &mut Subtree { - let TokenTree::Subtree(subtree) = &mut self.0[0] else { - unreachable!("the first token tree is always the top subtree"); - }; - subtree - } - - pub fn set_token(&mut self, idx: usize, leaf: Leaf) { - assert!(matches!(self.0[idx], TokenTree::Leaf(_)), "cannot replace a subtree by a leaf"); - self.0[idx] = leaf.into(); - } - - pub fn token_trees(&self) -> TokenTreesView<'_> { - self.view().token_trees() - } - - pub fn as_token_trees(&self) -> TokenTreesView<'_> { - self.view().as_token_trees() - } - - pub fn change_every_ast_id(&mut self, mut callback: impl FnMut(&mut span::ErasedFileAstId)) { - for tt in &mut self.0 { - match tt { - TokenTree::Leaf(Leaf::Ident(Ident { span, .. })) - | TokenTree::Leaf(Leaf::Literal(Literal { span, .. })) - | TokenTree::Leaf(Leaf::Punct(Punct { span, .. })) => { - callback(&mut span.anchor.ast_id); - } - TokenTree::Subtree(subtree) => { - callback(&mut subtree.delimiter.open.anchor.ast_id); - callback(&mut subtree.delimiter.close.anchor.ast_id); - } +impl<'a> TokenTreesReprRef<'a> { + #[inline] + fn get(&self, index: I) -> Option + where + I: SliceIndex< + [crate::storage::TokenTree], + Output = [crate::storage::TokenTree], + >, + I: SliceIndex< + [crate::storage::TokenTree], + Output = [crate::storage::TokenTree], + >, + I: SliceIndex< + [crate::storage::TokenTree], + Output = [crate::storage::TokenTree], + >, + { + Some(match self { + TokenTreesReprRef::SpanStorage32(tt) => { + TokenTreesReprRef::SpanStorage32(tt.get(index)?) + } + TokenTreesReprRef::SpanStorage64(tt) => { + TokenTreesReprRef::SpanStorage64(tt.get(index)?) + } + TokenTreesReprRef::SpanStorage96(tt) => { + TokenTreesReprRef::SpanStorage96(tt.get(index)?) } - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct TopSubtreeBuilder { - unclosed_subtree_indices: Vec, - token_trees: Vec, - last_closed_subtree: Option, -} - -impl TopSubtreeBuilder { - pub fn new(top_delimiter: Delimiter) -> Self { - let mut result = Self { - unclosed_subtree_indices: Vec::new(), - token_trees: Vec::new(), - last_closed_subtree: None, - }; - let top_subtree = TokenTree::Subtree(Subtree { delimiter: top_delimiter, len: 0 }); - result.token_trees.push(top_subtree); - result - } - - pub fn open(&mut self, delimiter_kind: DelimiterKind, open_span: Span) { - self.unclosed_subtree_indices.push(self.token_trees.len()); - self.token_trees.push(TokenTree::Subtree(Subtree { - delimiter: Delimiter { - open: open_span, - close: open_span, // Will be overwritten on close. - kind: delimiter_kind, - }, - len: 0, - })); - } - - pub fn close(&mut self, close_span: Span) { - let last_unclosed_index = self - .unclosed_subtree_indices - .pop() - .expect("attempt to close a `tt::Subtree` when none is open"); - let subtree_len = (self.token_trees.len() - last_unclosed_index - 1) as u32; - let TokenTree::Subtree(subtree) = &mut self.token_trees[last_unclosed_index] else { - unreachable!("unclosed token tree is always a subtree"); - }; - subtree.len = subtree_len; - subtree.delimiter.close = close_span; - self.last_closed_subtree = Some(last_unclosed_index); - } - - /// You cannot call this consecutively, it will only work once after close. - pub fn remove_last_subtree_if_invisible(&mut self) { - let Some(last_subtree_idx) = self.last_closed_subtree else { return }; - if let TokenTree::Subtree(Subtree { - delimiter: Delimiter { kind: DelimiterKind::Invisible, .. }, - .. - }) = self.token_trees[last_subtree_idx] - { - self.token_trees.remove(last_subtree_idx); - self.last_closed_subtree = None; - } - } - - pub fn push(&mut self, leaf: Leaf) { - self.token_trees.push(TokenTree::Leaf(leaf)); - } - - pub fn extend(&mut self, leaves: impl IntoIterator) { - self.token_trees.extend(leaves.into_iter().map(TokenTree::Leaf)); - } - - pub fn extend_with_tt(&mut self, tt: TokenTreesView<'_>) { - self.token_trees.extend(tt.0.iter().cloned()); - } - - /// Like [`Self::extend_with_tt()`], but makes sure the new tokens will never be - /// joint with whatever comes after them. - pub fn extend_with_tt_alone(&mut self, tt: TokenTreesView<'_>) { - if let Some((last, before_last)) = tt.0.split_last() { - self.token_trees.reserve(tt.0.len()); - self.token_trees.extend(before_last.iter().cloned()); - let last = if let TokenTree::Leaf(Leaf::Punct(last)) = last { - let mut last = *last; - last.spacing = Spacing::Alone; - TokenTree::Leaf(Leaf::Punct(last)) - } else { - last.clone() - }; - self.token_trees.push(last); - } - } - - pub fn expected_delimiters(&self) -> impl Iterator { - self.unclosed_subtree_indices.iter().rev().map(|&subtree_idx| { - let TokenTree::Subtree(subtree) = &self.token_trees[subtree_idx] else { - unreachable!("unclosed token tree is always a subtree") - }; - subtree.delimiter.kind }) } - - /// Builds, and remove the top subtree if it has only one subtree child. - pub fn build_skip_top_subtree(mut self) -> TopSubtree { - let top_tts = TokenTreesView::new(&self.token_trees[1..]); - match top_tts.try_into_subtree() { - Some(_) => { - assert!( - self.unclosed_subtree_indices.is_empty(), - "attempt to build an unbalanced `TopSubtreeBuilder`" - ); - TopSubtree(self.token_trees.drain(1..).collect()) - } - None => self.build(), - } - } - - pub fn build(mut self) -> TopSubtree { - assert!( - self.unclosed_subtree_indices.is_empty(), - "attempt to build an unbalanced `TopSubtreeBuilder`" - ); - let total_len = self.token_trees.len() as u32; - let TokenTree::Subtree(top_subtree) = &mut self.token_trees[0] else { - unreachable!("first token tree is always a subtree"); - }; - top_subtree.len = total_len - 1; - TopSubtree(self.token_trees.into_boxed_slice()) - } - - pub fn restore_point(&self) -> SubtreeBuilderRestorePoint { - SubtreeBuilderRestorePoint { - unclosed_subtree_indices_len: self.unclosed_subtree_indices.len(), - token_trees_len: self.token_trees.len(), - last_closed_subtree: self.last_closed_subtree, - } - } - - pub fn restore(&mut self, restore_point: SubtreeBuilderRestorePoint) { - self.unclosed_subtree_indices.truncate(restore_point.unclosed_subtree_indices_len); - self.token_trees.truncate(restore_point.token_trees_len); - self.last_closed_subtree = restore_point.last_closed_subtree; - } } #[derive(Clone, Copy)] -pub struct SubtreeBuilderRestorePoint { - unclosed_subtree_indices_len: usize, - token_trees_len: usize, - last_closed_subtree: Option, +pub struct TokenTreesView<'a> { + repr: TokenTreesReprRef<'a>, + span_parts: &'a [CompressedSpanPart], } -#[derive(Clone, Copy)] -pub struct TokenTreesView<'a>(&'a [TokenTree]); - impl<'a> TokenTreesView<'a> { - fn new(tts: &'a [TokenTree]) -> Self { - if cfg!(debug_assertions) { - tts.iter().enumerate().for_each(|(idx, tt)| { - if let TokenTree::Subtree(tt) = &tt { - // `<` and not `<=` because `Subtree.len` does not include the subtree node itself. - debug_assert!( - idx + tt.usize_len() < tts.len(), - "`TokenTreeView::new()` was given a cut-in-half list" - ); - } - }); - } - Self(tts) - } - pub fn empty() -> Self { - Self(&[]) + Self { repr: TokenTreesReprRef::SpanStorage32(&[]), span_parts: &[] } } pub fn iter(&self) -> TtIter<'a> { - TtIter::new(self.0) + TtIter::new(*self) } pub fn cursor(&self) -> Cursor<'a> { @@ -393,20 +201,23 @@ impl<'a> TokenTreesView<'a> { } pub fn len(&self) -> usize { - self.0.len() + dispatch_ref! { + match self.repr => tt => tt.len() + } } pub fn is_empty(&self) -> bool { - self.0.is_empty() + self.len() == 0 } pub fn try_into_subtree(self) -> Option> { - if let Some(TokenTree::Subtree(subtree)) = self.0.first() - && subtree.usize_len() == (self.0.len() - 1) - { - return Some(SubtreeView::new(self.0)); - } - None + let is_subtree = dispatch_ref! { + match self.repr => tt => matches!( + tt.first(), + Some(crate::storage::TokenTree::Subtree { len, .. }) if (*len as usize) == (tt.len() - 1) + ) + }; + if is_subtree { Some(SubtreeView(self)) } else { None } } pub fn strip_invisible(self) -> TokenTreesView<'a> { @@ -440,18 +251,23 @@ impl<'a> TokenTreesView<'a> { } pub fn first_span(&self) -> Option { - Some(self.0.first()?.first_span()) - } - - pub fn last_span(&self) -> Option { - Some(match self.0.last()? { - TokenTree::Leaf(it) => *it.span(), - TokenTree::Subtree(it) => it.delimiter.close, + Some(dispatch_ref! { + match self.repr => tt => tt.first()?.first_span().span(self.span_parts) }) } - pub fn iter_flat_tokens(&self) -> impl ExactSizeIterator + use<'a> { - self.0.iter().cloned() + pub fn last_span(&self) -> Option { + Some(dispatch_ref! { + match self.repr => tt => tt.last()?.last_span().span(self.span_parts) + }) + } + + pub fn iter_flat_tokens(self) -> impl ExactSizeIterator + use<'a> { + (0..self.len()).map(move |idx| { + dispatch_ref! { + match self.repr => tt => tt[idx].to_api(self.span_parts) + } + }) } } @@ -515,60 +331,70 @@ impl fmt::Display for TokenTreesView<'_> { #[derive(Clone, Copy)] // Invariant: always starts with `Subtree` that covers the entire thing. -pub struct SubtreeView<'a>(&'a [TokenTree]); +pub struct SubtreeView<'a>(TokenTreesView<'a>); impl<'a> SubtreeView<'a> { - pub fn new(tts: &'a [TokenTree]) -> Self { - if cfg!(debug_assertions) { - let TokenTree::Subtree(subtree) = &tts[0] else { - panic!("first token tree must be a subtree in `SubtreeView`"); - }; - assert_eq!( - subtree.usize_len(), - tts.len() - 1, - "subtree must cover the entire `SubtreeView`" - ); - } - Self(tts) - } - pub fn as_token_trees(self) -> TokenTreesView<'a> { - TokenTreesView::new(self.0) + self.0 } pub fn iter(&self) -> TtIter<'a> { - TtIter::new(&self.0[1..]) + self.token_trees().iter() } pub fn top_subtree(&self) -> Subtree { - let TokenTree::Subtree(subtree) = &self.0[0] else { - unreachable!("the first token tree is always the top subtree"); - }; - *subtree + dispatch_ref! { + match self.0.repr => tt => { + let crate::storage::TokenTree::Subtree { len, delim_kind, open_span, close_span } = + &tt[0] + else { + unreachable!("the first token tree is always the top subtree"); + }; + Subtree { + delimiter: Delimiter { + open: open_span.span(self.0.span_parts), + close: close_span.span(self.0.span_parts), + kind: *delim_kind, + }, + len: *len, + } + } + } } pub fn strip_invisible(&self) -> TokenTreesView<'a> { if self.top_subtree().delimiter.kind == DelimiterKind::Invisible { - TokenTreesView::new(&self.0[1..]) + self.token_trees() } else { - TokenTreesView::new(self.0) + self.0 } } pub fn token_trees(&self) -> TokenTreesView<'a> { - TokenTreesView::new(&self.0[1..]) + let repr = match self.0.repr { + TokenTreesReprRef::SpanStorage32(token_trees) => { + TokenTreesReprRef::SpanStorage32(&token_trees[1..]) + } + TokenTreesReprRef::SpanStorage64(token_trees) => { + TokenTreesReprRef::SpanStorage64(&token_trees[1..]) + } + TokenTreesReprRef::SpanStorage96(token_trees) => { + TokenTreesReprRef::SpanStorage96(&token_trees[1..]) + } + }; + TokenTreesView { repr, ..self.0 } } } impl fmt::Debug for SubtreeView<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&TokenTreesView(self.0), f) + fmt::Debug::fmt(&self.0, f) } } impl fmt::Display for SubtreeView<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&TokenTreesView(self.0), f) + fmt::Display::fmt(&self.0, f) } } @@ -937,91 +763,40 @@ impl Subtree { } } -impl TopSubtree { - /// A simple line string used for debugging - pub fn subtree_as_debug_string(&self, subtree_idx: usize) -> String { - fn debug_subtree( - output: &mut String, - subtree: &Subtree, - iter: &mut std::slice::Iter<'_, TokenTree>, - ) { - let delim = match subtree.delimiter.kind { - DelimiterKind::Brace => ("{", "}"), - DelimiterKind::Bracket => ("[", "]"), - DelimiterKind::Parenthesis => ("(", ")"), - DelimiterKind::Invisible => ("$", "$"), - }; - - output.push_str(delim.0); - let mut last = None; - let mut idx = 0; - while idx < subtree.len { - let child = iter.next().unwrap(); - debug_token_tree(output, child, last, iter); - last = Some(child); - idx += 1; - } - - output.push_str(delim.1); - } - - fn debug_token_tree( - output: &mut String, - tt: &TokenTree, - last: Option<&TokenTree>, - iter: &mut std::slice::Iter<'_, TokenTree>, - ) { - match tt { - TokenTree::Leaf(it) => { - let s = match it { - Leaf::Literal(it) => it.text().to_owned(), - Leaf::Punct(it) => it.char.to_string(), - Leaf::Ident(it) => format!("{}{}", it.is_raw.as_str(), it.sym), - }; - match (it, last) { - (Leaf::Ident(_), Some(&TokenTree::Leaf(Leaf::Ident(_)))) => { - output.push(' '); - output.push_str(&s); - } - (Leaf::Punct(_), Some(TokenTree::Leaf(Leaf::Punct(punct)))) => { - if punct.spacing == Spacing::Alone { - output.push(' '); - output.push_str(&s); - } else { - output.push_str(&s); - } - } - _ => output.push_str(&s), - } - } - TokenTree::Subtree(it) => debug_subtree(output, it, iter), - } - } - - let mut res = String::new(); - debug_token_tree( - &mut res, - &self.0[subtree_idx], - None, - &mut self.0[subtree_idx + 1..].iter(), - ); - res - } -} - pub fn pretty(tkns: TokenTreesView<'_>) -> String { - fn tokentree_to_text(tkn: &TokenTree, tkns: &mut &[TokenTree]) -> String { + return dispatch_ref! { + match tkns.repr => tt => pretty_impl(tt) + }; + + use crate::storage::TokenTree; + + fn tokentree_to_text(tkn: &TokenTree, tkns: &mut &[TokenTree]) -> String { match tkn { - TokenTree::Leaf(Leaf::Ident(ident)) => { - format!("{}{}", ident.is_raw.as_str(), ident.sym) + TokenTree::Ident { sym, is_raw, .. } => format!("{}{}", is_raw.as_str(), sym), + &TokenTree::Literal { ref text_and_suffix, kind, suffix_len, span: _ } => { + format!( + "{}", + Literal { + text_and_suffix: text_and_suffix.clone(), + span: Span { + range: TextRange::empty(TextSize::new(0)), + anchor: span::SpanAnchor { + file_id: span::EditionedFileId::from_raw(0), + ast_id: span::FIXUP_ERASED_FILE_AST_ID_MARKER + }, + ctx: span::SyntaxContext::root(span::Edition::Edition2015) + }, + kind, + suffix_len + } + ) } - TokenTree::Leaf(Leaf::Literal(literal)) => format!("{literal}"), - TokenTree::Leaf(Leaf::Punct(punct)) => format!("{}", punct.char), - TokenTree::Subtree(subtree) => { - let (subtree_content, rest) = tkns.split_at(subtree.usize_len()); - let content = pretty(TokenTreesView(subtree_content)); + TokenTree::Punct { char, .. } => format!("{}", char), + TokenTree::Subtree { len, delim_kind, .. } => { + let (subtree_content, rest) = tkns.split_at(*len as usize); + let content = pretty_impl(subtree_content); *tkns = rest; - let (open, close) = match subtree.delimiter.kind { + let (open, close) = match *delim_kind { DelimiterKind::Brace => ("{", "}"), DelimiterKind::Bracket => ("[", "]"), DelimiterKind::Parenthesis => ("(", ")"), @@ -1032,21 +807,26 @@ pub fn pretty(tkns: TokenTreesView<'_>) -> String { } } - let mut tkns = tkns.0; - let mut last = String::new(); - let mut last_to_joint = true; + fn pretty_impl(mut tkns: &[TokenTree]) -> String { + let mut last = String::new(); + let mut last_to_joint = true; - while let Some((tkn, rest)) = tkns.split_first() { - tkns = rest; - last = [last, tokentree_to_text(tkn, &mut tkns)].join(if last_to_joint { "" } else { " " }); - last_to_joint = false; - if let TokenTree::Leaf(Leaf::Punct(punct)) = tkn - && punct.spacing == Spacing::Joint - { - last_to_joint = true; + while let Some((tkn, rest)) = tkns.split_first() { + tkns = rest; + last = [last, tokentree_to_text(tkn, &mut tkns)].join(if last_to_joint { + "" + } else { + " " + }); + last_to_joint = false; + if let TokenTree::Punct { spacing, .. } = tkn + && *spacing == Spacing::Joint + { + last_to_joint = true; + } } + last } - last } #[derive(Debug)] @@ -1069,7 +849,7 @@ pub fn transform_tt<'b>( tt: &mut TopSubtree, mut callback: impl FnMut(TokenTree) -> TransformTtAction<'b>, ) { - let mut tt_vec = std::mem::take(&mut tt.0).into_vec(); + let mut tt_vec = tt.as_token_trees().iter_flat_tokens().collect::>(); // We need to keep a stack of the currently open subtrees, because we need to update // them if we change the number of items in them. @@ -1112,7 +892,7 @@ pub fn transform_tt<'b>( TokenTree::Subtree(subtree) => subtree.usize_len(), }; let len_diff = replacement.len() as i64 - old_len as i64; - tt_vec.splice(i..i + old_len, replacement.0.iter().cloned()); + tt_vec.splice(i..i + old_len, replacement.iter_flat_tokens()); // Skip the newly inserted replacement, we don't want to visit it. i += replacement.len(); @@ -1126,5 +906,5 @@ pub fn transform_tt<'b>( } } - tt.0 = tt_vec.into_boxed_slice(); + *tt = TopSubtree::from_serialized(tt_vec); } diff --git a/crates/tt/src/storage.rs b/crates/tt/src/storage.rs new file mode 100644 index 0000000000..62d2e20016 --- /dev/null +++ b/crates/tt/src/storage.rs @@ -0,0 +1,992 @@ +//! Spans are memory heavy, and we have a lot of token trees. Storing them straight +//! will waste a lot of memory. So instead we implement a clever compression mechanism: +//! +//! A `TopSubtree` has a list of [`CompressedSpanPart`], which are the parts of a span +//! that tend to be shared between tokens - namely, without the range. The main list +//! of token trees is kept in one of three versions, where we use the smallest version +//! we can for this tree: +//! +//! 1. In the most common version a span is just a `u32`. The bits are divided as follows: +//! there are 4 bits that index into the [`CompressedSpanPart`] list. 20 bits +//! store the range start, and 8 bits store the range length. In experiments, +//! this accounts for 75%-85% of the spans. +//! 2. In the second version a span is 64 bits. 32 bits for the range start, 16 bits +//! for the range length, and 16 bits for the span parts index. This is used in +//! less than 2% of all `TopSubtree`s, but they account for 15%-25% of the spans: +//! those are mostly token tree munchers, that generate a lot of `SyntaxContext`s +//! (because they recurse a lot), which is why they can't fit in the first version, +//! and tend to generate a lot of code. +//! 3. The third version is practically unused; 65,535 bytes for a token and 65,535 +//! unique span parts is more than enough for everybody. However, someone may still +//! create a macro that requires more, therefore we have this version as a backup: +//! it uses 96 bits, 32 for each of the range start, length and span parts index. + +use std::fmt; + +use intern::Symbol; +use rustc_hash::FxBuildHasher; +use span::{Span, SpanAnchor, SyntaxContext, TextRange, TextSize}; + +use crate::{ + DelimSpan, DelimiterKind, IdentIsRaw, LitKind, Spacing, SubtreeView, TokenTreesReprRef, + TokenTreesView, TtIter, dispatch_ref, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct CompressedSpanPart { + pub(crate) anchor: SpanAnchor, + pub(crate) ctx: SyntaxContext, +} + +impl CompressedSpanPart { + #[inline] + fn from_span(span: &Span) -> Self { + Self { anchor: span.anchor, ctx: span.ctx } + } + + #[inline] + fn recombine(&self, range: TextRange) -> Span { + Span { range, anchor: self.anchor, ctx: self.ctx } + } +} + +pub(crate) trait SpanStorage: Copy { + fn can_hold(text_range: TextRange, span_parts_index: usize) -> bool; + + fn new(text_range: TextRange, span_parts_index: usize) -> Self; + + fn text_range(&self) -> TextRange; + + fn span_parts_index(&self) -> usize; + + #[inline] + fn span(&self, span_parts: &[CompressedSpanPart]) -> Span { + span_parts[self.span_parts_index()].recombine(self.text_range()) + } +} + +#[inline] +const fn n_bits_mask(n: u32) -> u32 { + (1 << n) - 1 +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct SpanStorage32(u32); + +impl SpanStorage32 { + const SPAN_PARTS_BIT: u32 = 4; + const LEN_BITS: u32 = 8; + const OFFSET_BITS: u32 = 20; +} + +const _: () = assert!( + (SpanStorage32::SPAN_PARTS_BIT + SpanStorage32::LEN_BITS + SpanStorage32::OFFSET_BITS) + == u32::BITS +); + +impl SpanStorage for SpanStorage32 { + #[inline] + fn can_hold(text_range: TextRange, span_parts_index: usize) -> bool { + let offset = u32::from(text_range.start()); + let len = u32::from(text_range.len()); + let span_parts_index = span_parts_index as u32; + + offset <= n_bits_mask(Self::OFFSET_BITS) + && len <= n_bits_mask(Self::LEN_BITS) + && span_parts_index <= n_bits_mask(Self::SPAN_PARTS_BIT) + } + + #[inline] + fn new(text_range: TextRange, span_parts_index: usize) -> Self { + let offset = u32::from(text_range.start()); + let len = u32::from(text_range.len()); + let span_parts_index = span_parts_index as u32; + + debug_assert!(offset <= n_bits_mask(Self::OFFSET_BITS)); + debug_assert!(len <= n_bits_mask(Self::LEN_BITS)); + debug_assert!(span_parts_index <= n_bits_mask(Self::SPAN_PARTS_BIT)); + + Self( + (offset << (Self::LEN_BITS + Self::SPAN_PARTS_BIT)) + | (len << Self::SPAN_PARTS_BIT) + | span_parts_index, + ) + } + + #[inline] + fn text_range(&self) -> TextRange { + let offset = TextSize::new(self.0 >> (Self::SPAN_PARTS_BIT + Self::LEN_BITS)); + let len = TextSize::new((self.0 >> Self::SPAN_PARTS_BIT) & n_bits_mask(Self::LEN_BITS)); + TextRange::at(offset, len) + } + + #[inline] + fn span_parts_index(&self) -> usize { + (self.0 & n_bits_mask(Self::SPAN_PARTS_BIT)) as usize + } +} + +impl fmt::Debug for SpanStorage32 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SpanStorage32") + .field("text_range", &self.text_range()) + .field("span_parts_index", &self.span_parts_index()) + .finish() + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct SpanStorage64 { + offset: u32, + len_and_parts: u32, +} + +impl SpanStorage64 { + const SPAN_PARTS_BIT: u32 = 16; + const LEN_BITS: u32 = 16; +} + +const _: () = assert!((SpanStorage64::SPAN_PARTS_BIT + SpanStorage64::LEN_BITS) == u32::BITS); + +impl SpanStorage for SpanStorage64 { + #[inline] + fn can_hold(text_range: TextRange, span_parts_index: usize) -> bool { + let len = u32::from(text_range.len()); + let span_parts_index = span_parts_index as u32; + + len <= n_bits_mask(Self::LEN_BITS) && span_parts_index <= n_bits_mask(Self::SPAN_PARTS_BIT) + } + + #[inline] + fn new(text_range: TextRange, span_parts_index: usize) -> Self { + let offset = u32::from(text_range.start()); + let len = u32::from(text_range.len()); + let span_parts_index = span_parts_index as u32; + + debug_assert!(len <= n_bits_mask(Self::LEN_BITS)); + debug_assert!(span_parts_index <= n_bits_mask(Self::SPAN_PARTS_BIT)); + + Self { offset, len_and_parts: (len << Self::SPAN_PARTS_BIT) | span_parts_index } + } + + #[inline] + fn text_range(&self) -> TextRange { + let offset = TextSize::new(self.offset); + let len = TextSize::new(self.len_and_parts >> Self::SPAN_PARTS_BIT); + TextRange::at(offset, len) + } + + #[inline] + fn span_parts_index(&self) -> usize { + (self.len_and_parts & n_bits_mask(Self::SPAN_PARTS_BIT)) as usize + } +} + +impl fmt::Debug for SpanStorage64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SpanStorage64") + .field("text_range", &self.text_range()) + .field("span_parts_index", &self.span_parts_index()) + .finish() + } +} + +impl From for SpanStorage64 { + #[inline] + fn from(value: SpanStorage32) -> Self { + SpanStorage64::new(value.text_range(), value.span_parts_index()) + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct SpanStorage96 { + offset: u32, + len: u32, + parts: u32, +} + +impl SpanStorage for SpanStorage96 { + #[inline] + fn can_hold(_text_range: TextRange, _span_parts_index: usize) -> bool { + true + } + + #[inline] + fn new(text_range: TextRange, span_parts_index: usize) -> Self { + let offset = u32::from(text_range.start()); + let len = u32::from(text_range.len()); + let span_parts_index = span_parts_index as u32; + + Self { offset, len, parts: span_parts_index } + } + + #[inline] + fn text_range(&self) -> TextRange { + let offset = TextSize::new(self.offset); + let len = TextSize::new(self.len); + TextRange::at(offset, len) + } + + #[inline] + fn span_parts_index(&self) -> usize { + self.parts as usize + } +} + +impl fmt::Debug for SpanStorage96 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SpanStorage96") + .field("text_range", &self.text_range()) + .field("span_parts_index", &self.span_parts_index()) + .finish() + } +} + +impl From for SpanStorage96 { + #[inline] + fn from(value: SpanStorage32) -> Self { + SpanStorage96::new(value.text_range(), value.span_parts_index()) + } +} + +impl From for SpanStorage96 { + #[inline] + fn from(value: SpanStorage64) -> Self { + SpanStorage96::new(value.text_range(), value.span_parts_index()) + } +} + +// We don't use structs or enum nesting here to save padding. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) enum TokenTree { + Literal { text_and_suffix: Symbol, span: S, kind: LitKind, suffix_len: u8 }, + Punct { char: char, spacing: Spacing, span: S }, + Ident { sym: Symbol, span: S, is_raw: IdentIsRaw }, + Subtree { len: u32, delim_kind: DelimiterKind, open_span: S, close_span: S }, +} + +impl TokenTree { + #[inline] + pub(crate) fn first_span(&self) -> &S { + match self { + TokenTree::Literal { span, .. } => span, + TokenTree::Punct { span, .. } => span, + TokenTree::Ident { span, .. } => span, + TokenTree::Subtree { open_span, .. } => open_span, + } + } + + #[inline] + pub(crate) fn last_span(&self) -> &S { + match self { + TokenTree::Literal { span, .. } => span, + TokenTree::Punct { span, .. } => span, + TokenTree::Ident { span, .. } => span, + TokenTree::Subtree { close_span, .. } => close_span, + } + } + + #[inline] + pub(crate) fn to_api(&self, span_parts: &[CompressedSpanPart]) -> crate::TokenTree { + match self { + TokenTree::Literal { text_and_suffix, span, kind, suffix_len } => { + crate::TokenTree::Leaf(crate::Leaf::Literal(crate::Literal { + text_and_suffix: text_and_suffix.clone(), + span: span.span(span_parts), + kind: *kind, + suffix_len: *suffix_len, + })) + } + TokenTree::Punct { char, spacing, span } => { + crate::TokenTree::Leaf(crate::Leaf::Punct(crate::Punct { + char: *char, + spacing: *spacing, + span: span.span(span_parts), + })) + } + TokenTree::Ident { sym, span, is_raw } => { + crate::TokenTree::Leaf(crate::Leaf::Ident(crate::Ident { + sym: sym.clone(), + span: span.span(span_parts), + is_raw: *is_raw, + })) + } + TokenTree::Subtree { len, delim_kind, open_span, close_span } => { + crate::TokenTree::Subtree(crate::Subtree { + delimiter: crate::Delimiter { + open: open_span.span(span_parts), + close: close_span.span(span_parts), + kind: *delim_kind, + }, + len: *len, + }) + } + } + } + + #[inline] + fn convert>(self) -> TokenTree { + match self { + TokenTree::Literal { text_and_suffix, span, kind, suffix_len } => { + TokenTree::Literal { text_and_suffix, span: span.into(), kind, suffix_len } + } + TokenTree::Punct { char, spacing, span } => { + TokenTree::Punct { char, spacing, span: span.into() } + } + TokenTree::Ident { sym, span, is_raw } => { + TokenTree::Ident { sym, span: span.into(), is_raw } + } + TokenTree::Subtree { len, delim_kind, open_span, close_span } => TokenTree::Subtree { + len, + delim_kind, + open_span: open_span.into(), + close_span: close_span.into(), + }, + } + } +} + +// This is used a lot, make sure it doesn't grow unintentionally. +const _: () = { + assert!(size_of::>() == 16); + assert!(size_of::>() == 24); + assert!(size_of::>() == 32); +}; + +/// `dispatch! {}` +macro_rules! dispatch { + ( + match $scrutinee:expr => $tt:ident => $body:expr + ) => { + match $scrutinee { + TopSubtreeRepr::SpanStorage32($tt) => $body, + TopSubtreeRepr::SpanStorage64($tt) => $body, + TopSubtreeRepr::SpanStorage96($tt) => $body, + } + }; +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) enum TopSubtreeRepr { + SpanStorage32(Box<[TokenTree]>), + SpanStorage64(Box<[TokenTree]>), + SpanStorage96(Box<[TokenTree]>), +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct TopSubtree { + repr: TopSubtreeRepr, + span_parts: Box<[CompressedSpanPart]>, +} + +impl TopSubtree { + pub fn empty(span: DelimSpan) -> Self { + Self { + repr: TopSubtreeRepr::SpanStorage96(Box::new([TokenTree::Subtree { + len: 0, + delim_kind: DelimiterKind::Invisible, + open_span: SpanStorage96::new(span.open.range, 0), + close_span: SpanStorage96::new(span.close.range, 1), + }])), + span_parts: Box::new([ + CompressedSpanPart::from_span(&span.open), + CompressedSpanPart::from_span(&span.close), + ]), + } + } + + pub fn invisible_from_leaves( + delim_span: Span, + leaves: [crate::Leaf; N], + ) -> Self { + let mut builder = TopSubtreeBuilder::new(crate::Delimiter::invisible_spanned(delim_span)); + builder.extend(leaves); + builder.build() + } + + pub fn from_token_trees(delimiter: crate::Delimiter, token_trees: TokenTreesView<'_>) -> Self { + let mut builder = TopSubtreeBuilder::new(delimiter); + builder.extend_with_tt(token_trees); + builder.build() + } + + pub fn from_serialized(tt: Vec) -> Self { + let mut tt = tt.into_iter(); + let Some(crate::TokenTree::Subtree(top_subtree)) = tt.next() else { + panic!("first must always come the top subtree") + }; + let mut builder = TopSubtreeBuilder::new(top_subtree.delimiter); + for tt in tt { + builder.push_token_tree(tt); + } + builder.build() + } + + pub fn from_subtree(subtree: SubtreeView<'_>) -> Self { + let mut builder = TopSubtreeBuilder::new(subtree.top_subtree().delimiter); + builder.extend_with_tt(subtree.token_trees()); + builder.build() + } + + pub fn view(&self) -> SubtreeView<'_> { + let repr = match &self.repr { + TopSubtreeRepr::SpanStorage32(token_trees) => { + TokenTreesReprRef::SpanStorage32(token_trees) + } + TopSubtreeRepr::SpanStorage64(token_trees) => { + TokenTreesReprRef::SpanStorage64(token_trees) + } + TopSubtreeRepr::SpanStorage96(token_trees) => { + TokenTreesReprRef::SpanStorage96(token_trees) + } + }; + SubtreeView(TokenTreesView { repr, span_parts: &self.span_parts }) + } + + pub fn iter(&self) -> TtIter<'_> { + self.view().iter() + } + + pub fn top_subtree(&self) -> crate::Subtree { + self.view().top_subtree() + } + + pub fn set_top_subtree_delimiter_kind(&mut self, kind: DelimiterKind) { + dispatch! { + match &mut self.repr => tt => { + let TokenTree::Subtree { delim_kind, .. } = &mut tt[0] else { + unreachable!("the first token tree is always the top subtree"); + }; + *delim_kind = kind; + } + } + } + + fn ensure_can_hold(&mut self, range: TextRange) { + fn can_hold(_: &[TokenTree], range: TextRange) -> bool { + S::can_hold(range, 0) + } + let can_hold = dispatch! { + match &self.repr => tt => can_hold(tt, range) + }; + if can_hold { + return; + } + + // Otherwise, we do something very junky: recreate the entire tree. Hopefully this should be rare. + let mut builder = TopSubtreeBuilder::new(self.top_subtree().delimiter); + builder.extend_with_tt(self.token_trees()); + builder.ensure_can_hold(range, 0); + *self = builder.build(); + } + + pub fn set_top_subtree_delimiter_span(&mut self, span: DelimSpan) { + self.ensure_can_hold(span.open.range); + self.ensure_can_hold(span.close.range); + fn do_it(tt: &mut [TokenTree], span: DelimSpan) { + let TokenTree::Subtree { open_span, close_span, .. } = &mut tt[0] else { + unreachable!() + }; + *open_span = S::new(span.open.range, 0); + *close_span = S::new(span.close.range, 0); + } + dispatch! { + match &mut self.repr => tt => do_it(tt, span) + } + self.span_parts[0] = CompressedSpanPart::from_span(&span.open); + self.span_parts[1] = CompressedSpanPart::from_span(&span.close); + } + + /// Note: this cannot change spans. + pub fn set_token(&mut self, idx: usize, leaf: crate::Leaf) { + fn do_it( + tt: &mut [TokenTree], + idx: usize, + span_parts: &[CompressedSpanPart], + leaf: crate::Leaf, + ) { + assert!( + !matches!(tt[idx], TokenTree::Subtree { .. }), + "`TopSubtree::set_token()` must be called on a leaf" + ); + let existing_span_compressed = *tt[idx].first_span(); + let existing_span = existing_span_compressed.span(span_parts); + assert_eq!( + *leaf.span(), + existing_span, + "`TopSubtree::set_token()` cannot change spans" + ); + match leaf { + crate::Leaf::Literal(leaf) => { + tt[idx] = TokenTree::Literal { + text_and_suffix: leaf.text_and_suffix, + span: existing_span_compressed, + kind: leaf.kind, + suffix_len: leaf.suffix_len, + } + } + crate::Leaf::Punct(leaf) => { + tt[idx] = TokenTree::Punct { + char: leaf.char, + spacing: leaf.spacing, + span: existing_span_compressed, + } + } + crate::Leaf::Ident(leaf) => { + tt[idx] = TokenTree::Ident { + sym: leaf.sym, + span: existing_span_compressed, + is_raw: leaf.is_raw, + } + } + } + } + dispatch! { + match &mut self.repr => tt => do_it(tt, idx, &self.span_parts, leaf) + } + } + + pub fn token_trees(&self) -> TokenTreesView<'_> { + self.view().token_trees() + } + + pub fn as_token_trees(&self) -> TokenTreesView<'_> { + self.view().as_token_trees() + } + + pub fn change_every_ast_id(&mut self, mut callback: impl FnMut(&mut span::ErasedFileAstId)) { + for span_part in &mut self.span_parts { + callback(&mut span_part.anchor.ast_id); + } + } +} + +/// `dispatch_builder! {}` +macro_rules! dispatch_builder { + ( + match $scrutinee:expr => $tt:ident => $body:expr + ) => { + match $scrutinee { + TopSubtreeBuilderRepr::SpanStorage32($tt) => $body, + TopSubtreeBuilderRepr::SpanStorage64($tt) => $body, + TopSubtreeBuilderRepr::SpanStorage96($tt) => $body, + } + }; +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum TopSubtreeBuilderRepr { + SpanStorage32(Vec>), + SpanStorage64(Vec>), + SpanStorage96(Vec>), +} + +type FxIndexSet = indexmap::IndexSet; + +/// In any tree, the first two subtree parts are reserved for the top subtree. +/// +/// We do it because `TopSubtree` exposes an API to modify the top subtree, therefore it's more convenient +/// this way, and it's unlikely to affect memory usage. +const RESERVED_SPAN_PARTS_LEN: usize = 2; + +#[derive(Debug, Clone)] +pub struct TopSubtreeBuilder { + unclosed_subtree_indices: Vec, + token_trees: TopSubtreeBuilderRepr, + span_parts: FxIndexSet, + last_closed_subtree: Option, + /// We need to keep those because they are not inside `span_parts`, see [`RESERVED_SPAN_PARTS_LEN`]. + top_subtree_spans: DelimSpan, +} + +impl TopSubtreeBuilder { + pub fn new(top_delimiter: crate::Delimiter) -> Self { + let mut result = Self { + unclosed_subtree_indices: Vec::new(), + token_trees: TopSubtreeBuilderRepr::SpanStorage32(Vec::new()), + span_parts: FxIndexSet::default(), + last_closed_subtree: None, + top_subtree_spans: top_delimiter.delim_span(), + }; + result.ensure_can_hold(top_delimiter.open.range, 0); + result.ensure_can_hold(top_delimiter.close.range, 1); + fn push_first(tt: &mut Vec>, top_delimiter: crate::Delimiter) { + tt.push(TokenTree::Subtree { + len: 0, + delim_kind: top_delimiter.kind, + open_span: S::new(top_delimiter.open.range, 0), + close_span: S::new(top_delimiter.close.range, 1), + }); + } + dispatch_builder! { + match &mut result.token_trees => tt => push_first(tt, top_delimiter) + } + result + } + + fn span_part_index(&mut self, part: CompressedSpanPart) -> usize { + self.span_parts.insert_full(part).0 + RESERVED_SPAN_PARTS_LEN + } + + fn switch_repr>(repr: &mut Vec>) -> Vec> { + let repr = std::mem::take(repr); + repr.into_iter().map(|tt| tt.convert()).collect() + } + + /// Ensures we have a representation that can hold these values. + fn ensure_can_hold(&mut self, text_range: TextRange, span_parts_index: usize) { + match &mut self.token_trees { + TopSubtreeBuilderRepr::SpanStorage32(token_trees) => { + if SpanStorage32::can_hold(text_range, span_parts_index) { + // Can hold. + } else if SpanStorage64::can_hold(text_range, span_parts_index) { + self.token_trees = + TopSubtreeBuilderRepr::SpanStorage64(Self::switch_repr(token_trees)); + } else { + self.token_trees = + TopSubtreeBuilderRepr::SpanStorage96(Self::switch_repr(token_trees)); + } + } + TopSubtreeBuilderRepr::SpanStorage64(token_trees) => { + if SpanStorage64::can_hold(text_range, span_parts_index) { + // Can hold. + } else { + self.token_trees = + TopSubtreeBuilderRepr::SpanStorage96(Self::switch_repr(token_trees)); + } + } + TopSubtreeBuilderRepr::SpanStorage96(_) => { + // Can hold anything. + } + } + } + + /// Not to be exposed, this assumes the subtree's children will be filled in immediately. + fn push_subtree(&mut self, subtree: crate::Subtree) { + let open_span_parts_index = + self.span_part_index(CompressedSpanPart::from_span(&subtree.delimiter.open)); + self.ensure_can_hold(subtree.delimiter.open.range, open_span_parts_index); + let close_span_parts_index = + self.span_part_index(CompressedSpanPart::from_span(&subtree.delimiter.close)); + self.ensure_can_hold(subtree.delimiter.close.range, close_span_parts_index); + fn do_it( + tt: &mut Vec>, + open_span_parts_index: usize, + close_span_parts_index: usize, + subtree: crate::Subtree, + ) { + let open_span = S::new(subtree.delimiter.open.range, open_span_parts_index); + let close_span = S::new(subtree.delimiter.close.range, close_span_parts_index); + tt.push(TokenTree::Subtree { + len: subtree.len, + delim_kind: subtree.delimiter.kind, + open_span, + close_span, + }); + } + dispatch_builder! { + match &mut self.token_trees => tt => do_it(tt, open_span_parts_index, close_span_parts_index, subtree) + } + } + + pub fn open(&mut self, delimiter_kind: DelimiterKind, open_span: Span) { + let span_parts_index = self.span_part_index(CompressedSpanPart::from_span(&open_span)); + self.ensure_can_hold(open_span.range, span_parts_index); + fn do_it( + token_trees: &mut Vec>, + delimiter_kind: DelimiterKind, + range: TextRange, + span_parts_index: usize, + ) -> usize { + let open_span = S::new(range, span_parts_index); + token_trees.push(TokenTree::Subtree { + len: 0, + delim_kind: delimiter_kind, + open_span, + close_span: open_span, // Will be overwritten on close. + }); + token_trees.len() - 1 + } + let subtree_idx = dispatch_builder! { + match &mut self.token_trees => tt => do_it(tt, delimiter_kind, open_span.range, span_parts_index) + }; + self.unclosed_subtree_indices.push(subtree_idx); + } + + pub fn close(&mut self, close_span: Span) { + let span_parts_index = self.span_part_index(CompressedSpanPart::from_span(&close_span)); + let range = close_span.range; + self.ensure_can_hold(range, span_parts_index); + + let last_unclosed_index = self + .unclosed_subtree_indices + .pop() + .expect("attempt to close a `tt::Subtree` when none is open"); + fn do_it( + token_trees: &mut [TokenTree], + last_unclosed_index: usize, + range: TextRange, + span_parts_index: usize, + ) { + let token_trees_len = token_trees.len(); + let TokenTree::Subtree { len, delim_kind: _, open_span: _, close_span } = + &mut token_trees[last_unclosed_index] + else { + unreachable!("unclosed token tree is always a subtree"); + }; + *len = (token_trees_len - last_unclosed_index - 1) as u32; + *close_span = S::new(range, span_parts_index); + } + dispatch_builder! { + match &mut self.token_trees => tt => do_it(tt, last_unclosed_index, range, span_parts_index) + } + self.last_closed_subtree = Some(last_unclosed_index); + } + + /// You cannot call this consecutively, it will only work once after close. + pub fn remove_last_subtree_if_invisible(&mut self) { + let Some(last_subtree_idx) = self.last_closed_subtree else { return }; + fn do_it(tt: &mut Vec>, last_subtree_idx: usize) { + if let TokenTree::Subtree { delim_kind: DelimiterKind::Invisible, .. } = + tt[last_subtree_idx] + { + tt.remove(last_subtree_idx); + } + } + dispatch_builder! { + match &mut self.token_trees => tt => do_it(tt, last_subtree_idx) + } + self.last_closed_subtree = None; + } + + fn push_literal(&mut self, leaf: crate::Literal) { + let span_parts_index = self.span_part_index(CompressedSpanPart::from_span(&leaf.span)); + let range = leaf.span.range; + self.ensure_can_hold(range, span_parts_index); + fn do_it( + tt: &mut Vec>, + range: TextRange, + span_parts_index: usize, + leaf: crate::Literal, + ) { + tt.push(TokenTree::Literal { + text_and_suffix: leaf.text_and_suffix, + span: S::new(range, span_parts_index), + kind: leaf.kind, + suffix_len: leaf.suffix_len, + }) + } + dispatch_builder! { + match &mut self.token_trees => tt => do_it(tt, range, span_parts_index, leaf) + } + } + + fn push_punct(&mut self, leaf: crate::Punct) { + let span_parts_index = self.span_part_index(CompressedSpanPart::from_span(&leaf.span)); + let range = leaf.span.range; + self.ensure_can_hold(range, span_parts_index); + fn do_it( + tt: &mut Vec>, + range: TextRange, + span_parts_index: usize, + leaf: crate::Punct, + ) { + tt.push(TokenTree::Punct { + char: leaf.char, + spacing: leaf.spacing, + span: S::new(range, span_parts_index), + }) + } + dispatch_builder! { + match &mut self.token_trees => tt => do_it(tt, range, span_parts_index, leaf) + } + } + + fn push_ident(&mut self, leaf: crate::Ident) { + let span_parts_index = self.span_part_index(CompressedSpanPart::from_span(&leaf.span)); + let range = leaf.span.range; + self.ensure_can_hold(range, span_parts_index); + fn do_it( + tt: &mut Vec>, + range: TextRange, + span_parts_index: usize, + leaf: crate::Ident, + ) { + tt.push(TokenTree::Ident { + sym: leaf.sym, + span: S::new(range, span_parts_index), + is_raw: leaf.is_raw, + }) + } + dispatch_builder! { + match &mut self.token_trees => tt => do_it(tt, range, span_parts_index, leaf) + } + } + + pub fn push(&mut self, leaf: crate::Leaf) { + match leaf { + crate::Leaf::Literal(leaf) => self.push_literal(leaf), + crate::Leaf::Punct(leaf) => self.push_punct(leaf), + crate::Leaf::Ident(leaf) => self.push_ident(leaf), + } + } + + fn push_token_tree(&mut self, tt: crate::TokenTree) { + match tt { + crate::TokenTree::Leaf(leaf) => self.push(leaf), + crate::TokenTree::Subtree(subtree) => self.push_subtree(subtree), + } + } + + pub fn extend(&mut self, leaves: impl IntoIterator) { + leaves.into_iter().for_each(|leaf| self.push(leaf)); + } + + pub fn extend_with_tt(&mut self, tt: TokenTreesView<'_>) { + fn do_it( + this: &mut TopSubtreeBuilder, + tt: &[TokenTree], + span_parts: &[CompressedSpanPart], + ) { + for tt in tt { + this.push_token_tree(tt.to_api(span_parts)); + } + } + dispatch_ref! { + match tt.repr => tt_repr => do_it(self, tt_repr, tt.span_parts) + } + } + + /// Like [`Self::extend_with_tt()`], but makes sure the new tokens will never be + /// joint with whatever comes after them. + pub fn extend_with_tt_alone(&mut self, tt: TokenTreesView<'_>) { + self.extend_with_tt(tt); + fn do_it(tt: &mut [TokenTree]) { + if let Some(TokenTree::Punct { spacing, .. }) = tt.last_mut() { + *spacing = Spacing::Alone; + } + } + if !tt.is_empty() { + dispatch_builder! { + match &mut self.token_trees => tt => do_it(tt) + } + } + } + + pub fn expected_delimiters(&self) -> impl Iterator { + self.unclosed_subtree_indices.iter().rev().map(|&subtree_idx| { + dispatch_builder! { + match &self.token_trees => tt => { + let TokenTree::Subtree { delim_kind, .. } = tt[subtree_idx] else { + unreachable!("unclosed token tree is always a subtree") + }; + delim_kind + } + } + }) + } + + /// Builds, and remove the top subtree if it has only one subtree child. + pub fn build_skip_top_subtree(mut self) -> TopSubtree { + fn remove_first_if_needed( + tt: &mut Vec>, + top_delim_span: &mut DelimSpan, + span_parts: &FxIndexSet, + ) { + let tt_len = tt.len(); + let Some(TokenTree::Subtree { len, open_span, close_span, .. }) = tt.get_mut(1) else { + return; + }; + if (*len as usize) != (tt_len - 2) { + // Subtree does not cover the whole tree (minus 2; itself, and the top span). + return; + } + + // Now we need to adjust the spans, because we assume that the first two spans are always reserved. + let top_open_span = span_parts + .get_index(open_span.span_parts_index() - RESERVED_SPAN_PARTS_LEN) + .unwrap() + .recombine(open_span.text_range()); + let top_close_span = span_parts + .get_index(close_span.span_parts_index() - RESERVED_SPAN_PARTS_LEN) + .unwrap() + .recombine(close_span.text_range()); + *top_delim_span = DelimSpan { open: top_open_span, close: top_close_span }; + // Can't remove the top spans from the map, as maybe they're used by other things as well. + // Now we need to reencode the spans, because their parts index changed: + *open_span = S::new(open_span.text_range(), 0); + *close_span = S::new(close_span.text_range(), 1); + + tt.remove(0); + } + dispatch_builder! { + match &mut self.token_trees => tt => remove_first_if_needed(tt, &mut self.top_subtree_spans, &self.span_parts) + } + self.build() + } + + pub fn build(mut self) -> TopSubtree { + assert!( + self.unclosed_subtree_indices.is_empty(), + "attempt to build an unbalanced `TopSubtreeBuilder`" + ); + fn finish_top_len(tt: &mut [TokenTree]) { + let total_len = tt.len() as u32; + let TokenTree::Subtree { len, .. } = &mut tt[0] else { + unreachable!("first token tree is always a subtree"); + }; + *len = total_len - 1; + } + dispatch_builder! { + match &mut self.token_trees => tt => finish_top_len(tt) + } + + let span_parts = [ + CompressedSpanPart::from_span(&self.top_subtree_spans.open), + CompressedSpanPart::from_span(&self.top_subtree_spans.close), + ] + .into_iter() + .chain(self.span_parts.iter().copied()) + .collect(); + + let repr = match self.token_trees { + TopSubtreeBuilderRepr::SpanStorage32(tt) => { + TopSubtreeRepr::SpanStorage32(tt.into_boxed_slice()) + } + TopSubtreeBuilderRepr::SpanStorage64(tt) => { + TopSubtreeRepr::SpanStorage64(tt.into_boxed_slice()) + } + TopSubtreeBuilderRepr::SpanStorage96(tt) => { + TopSubtreeRepr::SpanStorage96(tt.into_boxed_slice()) + } + }; + + TopSubtree { repr, span_parts } + } + + pub fn restore_point(&self) -> SubtreeBuilderRestorePoint { + let token_trees_len = dispatch_builder! { + match &self.token_trees => tt => tt.len() + }; + SubtreeBuilderRestorePoint { + unclosed_subtree_indices_len: self.unclosed_subtree_indices.len(), + token_trees_len, + last_closed_subtree: self.last_closed_subtree, + } + } + + pub fn restore(&mut self, restore_point: SubtreeBuilderRestorePoint) { + self.unclosed_subtree_indices.truncate(restore_point.unclosed_subtree_indices_len); + dispatch_builder! { + match &mut self.token_trees => tt => tt.truncate(restore_point.token_trees_len) + } + self.last_closed_subtree = restore_point.last_closed_subtree; + } +} + +#[derive(Clone, Copy)] +pub struct SubtreeBuilderRestorePoint { + unclosed_subtree_indices_len: usize, + token_trees_len: usize, + last_closed_subtree: Option, +}