syntax/
lib.rs

1//! Syntax Tree library used throughout the rust-analyzer.
2//!
3//! Properties:
4//!   - easy and fast incremental re-parsing
5//!   - graceful handling of errors
6//!   - full-fidelity representation (*any* text can be precisely represented as
7//!     a syntax tree)
8//!
9//! For more information, see the [RFC]. Current implementation is inspired by
10//! the [Swift] one.
11//!
12//! The most interesting modules here are `syntax_node` (which defines concrete
13//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
14//! CST). The actual parser live in a separate `parser` crate, though the
15//! lexer lives in this crate.
16//!
17//! See `api_walkthrough` test in this file for a quick API tour!
18//!
19//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
20//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
21
22#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))]
23
24#[cfg(feature = "in-rust-tree")]
25extern crate rustc_driver as _;
26
27mod parsing;
28mod ptr;
29mod syntax_error;
30mod syntax_node;
31#[cfg(test)]
32mod tests;
33mod token_text;
34mod validation;
35
36pub mod algo;
37pub mod ast;
38#[doc(hidden)]
39pub mod fuzz;
40pub mod hacks;
41pub mod syntax_editor;
42pub mod ted;
43pub mod utils;
44
45use std::{marker::PhantomData, ops::Range};
46
47use stdx::format_to;
48use triomphe::Arc;
49
50pub use crate::{
51    ast::{AstNode, AstToken},
52    ptr::{AstPtr, SyntaxNodePtr},
53    syntax_error::SyntaxError,
54    syntax_node::{
55        PreorderWithTokens, RustLanguage, SyntaxElement, SyntaxElementChildren, SyntaxNode,
56        SyntaxNodeChildren, SyntaxToken, SyntaxTreeBuilder,
57    },
58    token_text::TokenText,
59};
60pub use parser::{Edition, SyntaxKind, T};
61pub use rowan::{
62    Direction, GreenNode, NodeOrToken, SyntaxText, TextRange, TextSize, TokenAtOffset, WalkEvent,
63    api::Preorder,
64};
65pub use rustc_literal_escaper as unescape;
66pub use smol_str::{SmolStr, SmolStrBuilder, ToSmolStr, format_smolstr};
67
68/// `Parse` is the result of the parsing: a syntax tree and a collection of
69/// errors.
70///
71/// Note that we always produce a syntax tree, even for completely invalid
72/// files.
73#[derive(Debug, PartialEq, Eq)]
74pub struct Parse<T> {
75    green: Option<GreenNode>,
76    errors: Option<Arc<[SyntaxError]>>,
77    _ty: PhantomData<fn() -> T>,
78}
79
80impl<T> Clone for Parse<T> {
81    fn clone(&self) -> Parse<T> {
82        Parse { green: self.green.clone(), errors: self.errors.clone(), _ty: PhantomData }
83    }
84}
85
86impl<T> Parse<T> {
87    fn new(green: GreenNode, errors: Vec<SyntaxError>) -> Parse<T> {
88        Parse {
89            green: Some(green),
90            errors: if errors.is_empty() { None } else { Some(errors.into()) },
91            _ty: PhantomData,
92        }
93    }
94
95    pub fn syntax_node(&self) -> SyntaxNode {
96        SyntaxNode::new_root(self.green.as_ref().unwrap().clone())
97    }
98
99    pub fn errors(&self) -> Vec<SyntaxError> {
100        let mut errors = if let Some(e) = self.errors.as_deref() { e.to_vec() } else { vec![] };
101        validation::validate(&self.syntax_node(), &mut errors);
102        errors
103    }
104}
105
106impl<T: AstNode> Parse<T> {
107    /// Converts this parse result into a parse result for an untyped syntax tree.
108    pub fn to_syntax(mut self) -> Parse<SyntaxNode> {
109        let green = self.green.take();
110        let errors = self.errors.take();
111        Parse { green, errors, _ty: PhantomData }
112    }
113
114    /// Gets the parsed syntax tree as a typed ast node.
115    ///
116    /// # Panics
117    ///
118    /// Panics if the root node cannot be casted into the typed ast node
119    /// (e.g. if it's an `ERROR` node).
120    pub fn tree(&self) -> T {
121        T::cast(self.syntax_node()).unwrap()
122    }
123
124    /// Converts from `Parse<T>` to [`Result<T, Vec<SyntaxError>>`].
125    pub fn ok(self) -> Result<T, Vec<SyntaxError>> {
126        match self.errors() {
127            errors if !errors.is_empty() => Err(errors),
128            _ => Ok(self.tree()),
129        }
130    }
131}
132
133impl Parse<SyntaxNode> {
134    pub fn cast<N: AstNode>(mut self) -> Option<Parse<N>> {
135        if N::cast(self.syntax_node()).is_some() {
136            Some(Parse { green: self.green.take(), errors: self.errors.take(), _ty: PhantomData })
137        } else {
138            None
139        }
140    }
141}
142
143impl Parse<SourceFile> {
144    pub fn debug_dump(&self) -> String {
145        let mut buf = format!("{:#?}", self.tree().syntax());
146        for err in self.errors() {
147            format_to!(buf, "error {:?}: {}\n", err.range(), err);
148        }
149        buf
150    }
151
152    pub fn reparse(&self, delete: TextRange, insert: &str, edition: Edition) -> Parse<SourceFile> {
153        self.incremental_reparse(delete, insert, edition)
154            .unwrap_or_else(|| self.full_reparse(delete, insert, edition))
155    }
156
157    fn incremental_reparse(
158        &self,
159        delete: TextRange,
160        insert: &str,
161        edition: Edition,
162    ) -> Option<Parse<SourceFile>> {
163        // FIXME: validation errors are not handled here
164        parsing::incremental_reparse(
165            self.tree().syntax(),
166            delete,
167            insert,
168            self.errors.as_deref().unwrap_or_default().iter().cloned(),
169            edition,
170        )
171        .map(|(green_node, errors, _reparsed_range)| Parse {
172            green: Some(green_node),
173            errors: if errors.is_empty() { None } else { Some(errors.into()) },
174            _ty: PhantomData,
175        })
176    }
177
178    fn full_reparse(&self, delete: TextRange, insert: &str, edition: Edition) -> Parse<SourceFile> {
179        let mut text = self.tree().syntax().text().to_string();
180        text.replace_range(Range::<usize>::from(delete), insert);
181        SourceFile::parse(&text, edition)
182    }
183}
184
185impl ast::Expr {
186    /// Parses an `ast::Expr` from `text`.
187    ///
188    /// Note that if the parsed root node is not a valid expression, [`Parse::tree`] will panic.
189    /// For example:
190    /// ```rust,should_panic
191    /// # use syntax::{ast, Edition};
192    /// ast::Expr::parse("let fail = true;", Edition::CURRENT).tree();
193    /// ```
194    pub fn parse(text: &str, edition: Edition) -> Parse<ast::Expr> {
195        let _p = tracing::info_span!("Expr::parse").entered();
196        let (green, errors) = parsing::parse_text_at(text, parser::TopEntryPoint::Expr, edition);
197        let root = SyntaxNode::new_root(green.clone());
198
199        assert!(
200            ast::Expr::can_cast(root.kind()) || root.kind() == SyntaxKind::ERROR,
201            "{:?} isn't an expression",
202            root.kind()
203        );
204        Parse::new(green, errors)
205    }
206}
207
208#[cfg(not(no_salsa_async_drops))]
209impl<T> Drop for Parse<T> {
210    fn drop(&mut self) {
211        let Some(green) = self.green.take() else {
212            return;
213        };
214        static PARSE_DROP_THREAD: std::sync::OnceLock<std::sync::mpsc::Sender<GreenNode>> =
215            std::sync::OnceLock::new();
216        PARSE_DROP_THREAD
217            .get_or_init(|| {
218                let (sender, receiver) = std::sync::mpsc::channel::<GreenNode>();
219                std::thread::Builder::new()
220                    .name("ParseNodeDropper".to_owned())
221                    .spawn(move || {
222                        loop {
223                            // block on a receive
224                            _ = receiver.recv();
225                            // then drain the entire channel
226                            while receiver.try_recv().is_ok() {}
227                            // and sleep for a bit
228                            std::thread::sleep(std::time::Duration::from_millis(100));
229                        }
230                        // why do this over just a `receiver.iter().for_each(drop)`? To reduce contention on the channel lock.
231                        // otherwise this thread will constantly wake up and sleep again.
232                    })
233                    .unwrap();
234                sender
235            })
236            .send(green)
237            .unwrap();
238    }
239}
240
241/// `SourceFile` represents a parse tree for a single Rust file.
242pub use crate::ast::SourceFile;
243
244impl SourceFile {
245    pub fn parse(text: &str, edition: Edition) -> Parse<SourceFile> {
246        let _p = tracing::info_span!("SourceFile::parse").entered();
247        let (green, errors) = parsing::parse_text(text, edition);
248        let root = SyntaxNode::new_root(green.clone());
249
250        assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
251        Parse::new(green, errors)
252    }
253}
254
255/// Matches a `SyntaxNode` against an `ast` type.
256///
257/// # Example:
258///
259/// ```ignore
260/// match_ast! {
261///     match node {
262///         ast::CallExpr(it) => { ... },
263///         ast::MethodCallExpr(it) => { ... },
264///         ast::MacroCall(it) => { ... },
265///         _ => None,
266///     }
267/// }
268/// ```
269#[macro_export]
270macro_rules! match_ast {
271    (match $node:ident { $($tt:tt)* }) => { $crate::match_ast!(match ($node) { $($tt)* }) };
272
273    (match ($node:expr) {
274        $( $( $path:ident )::+ ($it:pat) => $res:expr, )*
275        _ => $catch_all:expr $(,)?
276    }) => {{
277        $( if let Some($it) = $($path::)+cast($node.clone()) { $res } else )*
278        { $catch_all }
279    }};
280}
281
282/// This test does not assert anything and instead just shows off the crate's
283/// API.
284#[test]
285fn api_walkthrough() {
286    use ast::{HasModuleItem, HasName};
287
288    let source_code = "
289        fn foo() {
290            1 + 1
291        }
292    ";
293    // `SourceFile` is the main entry point.
294    //
295    // The `parse` method returns a `Parse` -- a pair of syntax tree and a list
296    // of errors. That is, syntax tree is constructed even in presence of errors.
297    let parse = SourceFile::parse(source_code, parser::Edition::CURRENT);
298    assert!(parse.errors().is_empty());
299
300    // The `tree` method returns an owned syntax node of type `SourceFile`.
301    // Owned nodes are cheap: inside, they are `Rc` handles to the underlying data.
302    let file: SourceFile = parse.tree();
303
304    // `SourceFile` is the root of the syntax tree. We can iterate file's items.
305    // Let's fetch the `foo` function.
306    let mut func = None;
307    for item in file.items() {
308        match item {
309            ast::Item::Fn(f) => func = Some(f),
310            _ => unreachable!(),
311        }
312    }
313    let func: ast::Fn = func.unwrap();
314
315    // Each AST node has a bunch of getters for children. All getters return
316    // `Option`s though, to account for incomplete code. Some getters are common
317    // for several kinds of node. In this case, a trait like `ast::NameOwner`
318    // usually exists. By convention, all ast types should be used with `ast::`
319    // qualifier.
320    let name: Option<ast::Name> = func.name();
321    let name = name.unwrap();
322    assert_eq!(name.text(), "foo");
323
324    // Let's get the `1 + 1` expression!
325    let body: ast::BlockExpr = func.body().unwrap();
326    let stmt_list: ast::StmtList = body.stmt_list().unwrap();
327    let expr: ast::Expr = stmt_list.tail_expr().unwrap();
328
329    // Enums are used to group related ast nodes together, and can be used for
330    // matching. However, because there are no public fields, it's possible to
331    // match only the top level enum: that is the price we pay for increased API
332    // flexibility
333    let bin_expr: &ast::BinExpr = match &expr {
334        ast::Expr::BinExpr(e) => e,
335        _ => unreachable!(),
336    };
337
338    // Besides the "typed" AST API, there's an untyped CST one as well.
339    // To switch from AST to CST, call `.syntax()` method:
340    let expr_syntax: &SyntaxNode = expr.syntax();
341
342    // Note how `expr` and `bin_expr` are in fact the same node underneath:
343    assert!(expr_syntax == bin_expr.syntax());
344
345    // To go from CST to AST, `AstNode::cast` function is used:
346    let _expr: ast::Expr = match ast::Expr::cast(expr_syntax.clone()) {
347        Some(e) => e,
348        None => unreachable!(),
349    };
350
351    // The two properties each syntax node has is a `SyntaxKind`:
352    assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
353
354    // And text range:
355    assert_eq!(expr_syntax.text_range(), TextRange::new(32.into(), 37.into()));
356
357    // You can get node's text as a `SyntaxText` object, which will traverse the
358    // tree collecting token's text:
359    let text: SyntaxText = expr_syntax.text();
360    assert_eq!(text.to_string(), "1 + 1");
361
362    // There's a bunch of traversal methods on `SyntaxNode`:
363    assert_eq!(expr_syntax.parent().as_ref(), Some(stmt_list.syntax()));
364    assert_eq!(stmt_list.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
365    assert_eq!(
366        expr_syntax.next_sibling_or_token().map(|it| it.kind()),
367        Some(SyntaxKind::WHITESPACE)
368    );
369
370    // As well as some iterator helpers:
371    let f = expr_syntax.ancestors().find_map(ast::Fn::cast);
372    assert_eq!(f, Some(func));
373    assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
374    assert_eq!(
375        expr_syntax.descendants_with_tokens().count(),
376        8, // 5 tokens `1`, ` `, `+`, ` `, `1`
377           // 2 child literal expressions: `1`, `1`
378           // 1 the node itself: `1 + 1`
379    );
380
381    // There's also a `preorder` method with a more fine-grained iteration control:
382    let mut buf = String::new();
383    let mut indent = 0;
384    for event in expr_syntax.preorder_with_tokens() {
385        match event {
386            WalkEvent::Enter(node) => {
387                let text = match &node {
388                    NodeOrToken::Node(it) => it.text().to_string(),
389                    NodeOrToken::Token(it) => it.text().to_owned(),
390                };
391                format_to!(buf, "{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
392                indent += 2;
393            }
394            WalkEvent::Leave(_) => indent -= 2,
395        }
396    }
397    assert_eq!(indent, 0);
398    assert_eq!(
399        buf.trim(),
400        r#"
401"1 + 1" BIN_EXPR
402  "1" LITERAL
403    "1" INT_NUMBER
404  " " WHITESPACE
405  "+" PLUS
406  " " WHITESPACE
407  "1" LITERAL
408    "1" INT_NUMBER
409"#
410        .trim()
411    );
412
413    // To recursively process the tree, there are three approaches:
414    // 1. explicitly call getter methods on AST nodes.
415    // 2. use descendants and `AstNode::cast`.
416    // 3. use descendants and `match_ast!`.
417    //
418    // Here's how the first one looks like:
419    let exprs_cast: Vec<String> = file
420        .syntax()
421        .descendants()
422        .filter_map(ast::Expr::cast)
423        .map(|expr| expr.syntax().text().to_string())
424        .collect();
425
426    // An alternative is to use a macro.
427    let mut exprs_visit = Vec::new();
428    for node in file.syntax().descendants() {
429        match_ast! {
430            match node {
431                ast::Expr(it) => {
432                    let res = it.syntax().text().to_string();
433                    exprs_visit.push(res);
434                },
435                _ => (),
436            }
437        }
438    }
439    assert_eq!(exprs_cast, exprs_visit);
440}