Merge #1013

1013: Fuzz reparsing and fix found bugs r=matklad a=pcpthm Add fuzz test for reparsing which: - Checks reparsing doesn't panic and validate result syntax tree. - Checks that incremental reparsing produces the same syntax tree as full reparse. - Check for that errors are the same as full reparsing is disabled because errors are less important than syntax tree and produce failures which I couldn't figure out how to fix immediately (FIXME comment). I guess the current input generation is inefficient but still found several bugs: - Arithmetic overflow (negative result on an unsigned type). I changed the signature of `SyntaxError::add_offset` to solve this problem. - When reparsing a leaf, the token of the leaf can be joined to the next characters. Such case was not considered. - UNDERSCORE token was not produced when text length is exactly 1 (not a reparsing bug). - When reparsing a block, *inner* curly braces should be balanced. i.e. `{}{}` is invalid. - Effects of deleting newlines were not considered. Co-authored-by: pcpthm <pcpthm@gmail.com>
2025-09-28 11:20:54 +00:00 · 2019-03-22 05:48:55 +00:00 · 2019-03-22 05:48:55 +00:00 · 2a6544f906
commit 2a6544f906
parent ed823cb38d bf8e7930da
15 changed files with 149 additions and 24 deletions
--- a/crates/ra_syntax/fuzz/Cargo.toml
+++ b/crates/ra_syntax/fuzz/Cargo.toml
@ -4,14 +4,15 @@ name = "ra_syntax-fuzz"
 version = "0.0.1"
 authors = ["rust-analyzer developers"]
 publish = false
+edition = "2018"

 [package.metadata]
 cargo-fuzz = true

-[dependencies.ra_syntax]
-path = ".."
-[dependencies.libfuzzer-sys]
-git = "https://github.com/rust-fuzz/libfuzzer-sys.git"
+[dependencies]
+ra_syntax = { path = ".." }
+ra_text_edit = { path = "../../ra_text_edit" }
+libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" }

 # Prevent this from interfering with workspaces
 [workspace]
@ -20,3 +21,7 @@ members = ["."]
 [[bin]]
 name = "parser"
 path = "fuzz_targets/parser.rs"
+
+[[bin]]
+name = "reparse"
+path = "fuzz_targets/reparse.rs"
--- a/crates/ra_syntax/fuzz/fuzz_targets/parser.rs
+++ b/crates/ra_syntax/fuzz/fuzz_targets/parser.rs
@ -1,9 +1,9 @@
 #![no_main]
-#[macro_use] extern crate libfuzzer_sys;
-extern crate ra_syntax;
+use libfuzzer_sys::fuzz_target;
+use ra_syntax::fuzz::check_parser;

 fuzz_target!(|data: &[u8]| {
    if let Ok(text) = std::str::from_utf8(data) {
-        ra_syntax::check_fuzz_invariants(text)
+        check_parser(text)
    }
 });
--- a/crates/ra_syntax/fuzz/fuzz_targets/reparse.rs
+++ b/crates/ra_syntax/fuzz/fuzz_targets/reparse.rs
@ -0,0 +1,9 @@
+#![no_main]
+use libfuzzer_sys::fuzz_target;
+use ra_syntax::fuzz::CheckReparse;
+
+fuzz_target!(|data: &[u8]| {
+    if let Some(check) = CheckReparse::from_data(data) {
+        check.run();
+    }
+});
--- a/crates/ra_syntax/src/fuzz.rs
+++ b/crates/ra_syntax/src/fuzz.rs
@ -0,0 +1,67 @@
+use crate::{SourceFile, validation, TextUnit, TextRange, AstNode};
+use ra_text_edit::AtomTextEdit;
+use std::str::{self, FromStr};
+
+fn check_file_invariants(file: &SourceFile) {
+    let root = file.syntax();
+    validation::validate_block_structure(root);
+    let _ = file.errors();
+}
+
+pub fn check_parser(text: &str) {
+    let file = SourceFile::parse(text);
+    check_file_invariants(&file);
+}
+
+#[derive(Debug, Clone)]
+pub struct CheckReparse {
+    text: String,
+    edit: AtomTextEdit,
+    edited_text: String,
+}
+
+impl CheckReparse {
+    pub fn from_data(data: &[u8]) -> Option<Self> {
+        const PREFIX: &'static str = "fn main(){\n\t";
+        const SUFFIX: &'static str = "\n}";
+
+        let data = str::from_utf8(data).ok()?;
+        let mut lines = data.lines();
+        let delete_start = usize::from_str(lines.next()?).ok()? + PREFIX.len();
+        let delete_len = usize::from_str(lines.next()?).ok()?;
+        let insert = lines.next()?.to_string();
+        let text = lines.collect::<Vec<_>>().join("\n");
+        let text = format!("{}{}{}", PREFIX, text, SUFFIX);
+        text.get(delete_start..delete_start.checked_add(delete_len)?)?; // make sure delete is a valid range
+        let delete = TextRange::offset_len(
+            TextUnit::from_usize(delete_start),
+            TextUnit::from_usize(delete_len),
+        );
+        let edited_text =
+            format!("{}{}{}", &text[..delete_start], &insert, &text[delete_start + delete_len..]);
+        let edit = AtomTextEdit { delete, insert };
+        Some(CheckReparse { text, edit, edited_text })
+    }
+
+    pub fn run(&self) {
+        let file = SourceFile::parse(&self.text);
+        let new_file = file.reparse(&self.edit);
+        check_file_invariants(&new_file);
+        assert_eq!(&new_file.syntax().text().to_string(), &self.edited_text);
+        let full_reparse = SourceFile::parse(&self.edited_text);
+        for (a, b) in new_file.syntax().descendants().zip(full_reparse.syntax().descendants()) {
+            if (a.kind(), a.range()) != (b.kind(), b.range()) {
+                eprint!("original:\n{}", file.syntax().debug_dump());
+                eprint!("reparsed:\n{}", new_file.syntax().debug_dump());
+                eprint!("full reparse:\n{}", full_reparse.syntax().debug_dump());
+                assert_eq!(
+                    format!("{:?}", a),
+                    format!("{:?}", b),
+                    "different syntax tree produced by the full reparse"
+                );
+            }
+        }
+        // FIXME
+        // assert_eq!(new_file.errors(), full_reparse.errors());
+    }
+}
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@ -29,6 +29,8 @@ mod ptr;

 pub mod algo;
 pub mod ast;
+#[doc(hidden)]
+pub mod fuzz;

 pub use rowan::{SmolStr, TextRange, TextUnit};
 pub use ra_parser::SyntaxKind;
@ -83,13 +85,6 @@ impl SourceFile {
    }
 }

-pub fn check_fuzz_invariants(text: &str) {
-    let file = SourceFile::parse(text);
-    let root = file.syntax();
-    validation::validate_block_structure(root);
-    let _ = file.errors();
-}
-
 /// This test does not assert anything and instead just shows off the crate's
 /// API.
 #[test]
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@ -195,6 +195,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
            ptr.bump();
            true
        }
+        ('_', None) => return UNDERSCORE,
        ('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE,
        _ => false,
    };
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@ -33,12 +33,19 @@ pub(crate) fn incremental_reparse(
 }

 fn reparse_leaf<'node>(
-    node: &'node SyntaxNode,
+    root: &'node SyntaxNode,
    edit: &AtomTextEdit,
 ) -> Option<(&'node SyntaxNode, GreenNode, Vec<SyntaxError>)> {
-    let node = algo::find_covering_node(node, edit.delete);
+    let node = algo::find_covering_node(root, edit.delete);
    match node.kind() {
        WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => {
+            if node.kind() == WHITESPACE || node.kind() == COMMENT {
+                // removing a new line may extends previous token
+                if node.text().to_string()[edit.delete - node.range().start()].contains('\n') {
+                    return None;
+                }
+            }
+
            let text = get_text_after_edit(node, &edit);
            let tokens = tokenize(&text);
            let token = match tokens[..] {
@ -50,6 +57,13 @@ fn reparse_leaf<'node>(
                return None;
            }

+            if let Some(next_char) = root.text().char_at(node.range().end()) {
+                let tokens_with_next_char = tokenize(&format!("{}{}", text, next_char));
+                if tokens_with_next_char.len() == 1 {
+                    return None;
+                }
+            }
+
            let green = GreenNode::new_leaf(node.kind(), text.into());
            let new_errors = vec![];
            Some((node, green, new_errors))
@ -104,7 +118,7 @@ fn is_balanced(tokens: &[Token]) -> bool {
        return false;
    }
    let mut balance = 0usize;
-    for t in tokens.iter() {
+    for t in &tokens[1..tokens.len() - 1] {
        match t.kind {
            L_CURLY => balance += 1,
            R_CURLY => {
@ -130,11 +144,11 @@ fn merge_errors(
        if e.offset() <= old_node.range().start() {
            res.push(e)
        } else if e.offset() >= old_node.range().end() {
-            res.push(e.add_offset(TextUnit::of_str(&edit.insert) - edit.delete.len()));
+            res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len()));
        }
    }
    for e in new_errors {
-        res.push(e.add_offset(old_node.range().start()));
+        res.push(e.add_offset(old_node.range().start(), 0.into()));
    }
    res
 }
--- a/crates/ra_syntax/src/syntax_error.rs
+++ b/crates/ra_syntax/src/syntax_error.rs
@ -48,10 +48,10 @@ impl SyntaxError {
        }
    }

-    pub fn add_offset(mut self, plus_offset: TextUnit) -> SyntaxError {
+    pub fn add_offset(mut self, plus_offset: TextUnit, minus_offset: TextUnit) -> SyntaxError {
        self.location = match self.location {
-            Location::Range(range) => Location::Range(range + plus_offset),
-            Location::Offset(offset) => Location::Offset(offset + plus_offset),
+            Location::Range(range) => Location::Range(range + plus_offset - minus_offset),
+            Location::Offset(offset) => Location::Offset(offset + plus_offset - minus_offset),
        };

        self
--- a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs
+++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs
@ -0,0 +1,6 @@
+0
+1
+
+
+
+0
--- a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs
+++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs
@ -0,0 +1,4 @@
+0
+1
+
+bb"
--- a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs
+++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs
@ -0,0 +1,4 @@
+1
+1
+
+""!
--- a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs
+++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs
--- a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs
+++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs
@ -0,0 +1,4 @@
+0
+0
+}
+{;
--- a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs
+++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs
@ -0,0 +1,7 @@
+05
+1
+
+
+
+b'
+		
--- a/crates/ra_syntax/tests/test.rs
+++ b/crates/ra_syntax/tests/test.rs
@ -8,7 +8,7 @@ use std::{
 };

 use test_utils::{project_dir, dir_tests, read_text, collect_tests};
-use ra_syntax::{SourceFile, AstNode, check_fuzz_invariants};
+use ra_syntax::{SourceFile, AstNode, fuzz};

 #[test]
 fn lexer_tests() {
@ -47,7 +47,16 @@ fn parser_tests() {
 #[test]
 fn parser_fuzz_tests() {
    for (_, text) in collect_tests(&test_data_dir(), &["parser/fuzz-failures"]) {
-        check_fuzz_invariants(&text)
+        fuzz::check_parser(&text)
+    }
+}
+
+#[test]
+fn reparse_fuzz_tests() {
+    for (_, text) in collect_tests(&test_data_dir(), &["reparse/fuzz-failures"]) {
+        let check = fuzz::CheckReparse::from_data(text.as_bytes()).unwrap();
+        println!("{:?}", check);
+        check.run();
    }
 }