mirror of
https://github.com/rust-lang/rust-analyzer.git
synced 2025-09-28 11:20:54 +00:00
Merge #1013
1013: Fuzz reparsing and fix found bugs r=matklad a=pcpthm Add fuzz test for reparsing which: - Checks reparsing doesn't panic and validate result syntax tree. - Checks that incremental reparsing produces the same syntax tree as full reparse. - Check for that errors are the same as full reparsing is disabled because errors are less important than syntax tree and produce failures which I couldn't figure out how to fix immediately (FIXME comment). I guess the current input generation is inefficient but still found several bugs: - Arithmetic overflow (negative result on an unsigned type). I changed the signature of `SyntaxError::add_offset` to solve this problem. - When reparsing a leaf, the token of the leaf can be joined to the next characters. Such case was not considered. - UNDERSCORE token was not produced when text length is exactly 1 (not a reparsing bug). - When reparsing a block, *inner* curly braces should be balanced. i.e. `{}{}` is invalid. - Effects of deleting newlines were not considered. Co-authored-by: pcpthm <pcpthm@gmail.com>
This commit is contained in:
commit
2a6544f906
@ -4,14 +4,15 @@ name = "ra_syntax-fuzz"
|
||||
version = "0.0.1"
|
||||
authors = ["rust-analyzer developers"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies.ra_syntax]
|
||||
path = ".."
|
||||
[dependencies.libfuzzer-sys]
|
||||
git = "https://github.com/rust-fuzz/libfuzzer-sys.git"
|
||||
[dependencies]
|
||||
ra_syntax = { path = ".." }
|
||||
ra_text_edit = { path = "../../ra_text_edit" }
|
||||
libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" }
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
@ -20,3 +21,7 @@ members = ["."]
|
||||
[[bin]]
|
||||
name = "parser"
|
||||
path = "fuzz_targets/parser.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "reparse"
|
||||
path = "fuzz_targets/reparse.rs"
|
||||
|
@ -1,9 +1,9 @@
|
||||
#![no_main]
|
||||
#[macro_use] extern crate libfuzzer_sys;
|
||||
extern crate ra_syntax;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use ra_syntax::fuzz::check_parser;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
if let Ok(text) = std::str::from_utf8(data) {
|
||||
ra_syntax::check_fuzz_invariants(text)
|
||||
check_parser(text)
|
||||
}
|
||||
});
|
||||
|
9
crates/ra_syntax/fuzz/fuzz_targets/reparse.rs
Normal file
9
crates/ra_syntax/fuzz/fuzz_targets/reparse.rs
Normal file
@ -0,0 +1,9 @@
|
||||
#![no_main]
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use ra_syntax::fuzz::CheckReparse;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
if let Some(check) = CheckReparse::from_data(data) {
|
||||
check.run();
|
||||
}
|
||||
});
|
67
crates/ra_syntax/src/fuzz.rs
Normal file
67
crates/ra_syntax/src/fuzz.rs
Normal file
@ -0,0 +1,67 @@
|
||||
use crate::{SourceFile, validation, TextUnit, TextRange, AstNode};
|
||||
use ra_text_edit::AtomTextEdit;
|
||||
use std::str::{self, FromStr};
|
||||
|
||||
fn check_file_invariants(file: &SourceFile) {
|
||||
let root = file.syntax();
|
||||
validation::validate_block_structure(root);
|
||||
let _ = file.errors();
|
||||
}
|
||||
|
||||
pub fn check_parser(text: &str) {
|
||||
let file = SourceFile::parse(text);
|
||||
check_file_invariants(&file);
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CheckReparse {
|
||||
text: String,
|
||||
edit: AtomTextEdit,
|
||||
edited_text: String,
|
||||
}
|
||||
|
||||
impl CheckReparse {
|
||||
pub fn from_data(data: &[u8]) -> Option<Self> {
|
||||
const PREFIX: &'static str = "fn main(){\n\t";
|
||||
const SUFFIX: &'static str = "\n}";
|
||||
|
||||
let data = str::from_utf8(data).ok()?;
|
||||
let mut lines = data.lines();
|
||||
let delete_start = usize::from_str(lines.next()?).ok()? + PREFIX.len();
|
||||
let delete_len = usize::from_str(lines.next()?).ok()?;
|
||||
let insert = lines.next()?.to_string();
|
||||
let text = lines.collect::<Vec<_>>().join("\n");
|
||||
let text = format!("{}{}{}", PREFIX, text, SUFFIX);
|
||||
text.get(delete_start..delete_start.checked_add(delete_len)?)?; // make sure delete is a valid range
|
||||
let delete = TextRange::offset_len(
|
||||
TextUnit::from_usize(delete_start),
|
||||
TextUnit::from_usize(delete_len),
|
||||
);
|
||||
let edited_text =
|
||||
format!("{}{}{}", &text[..delete_start], &insert, &text[delete_start + delete_len..]);
|
||||
let edit = AtomTextEdit { delete, insert };
|
||||
Some(CheckReparse { text, edit, edited_text })
|
||||
}
|
||||
|
||||
pub fn run(&self) {
|
||||
let file = SourceFile::parse(&self.text);
|
||||
let new_file = file.reparse(&self.edit);
|
||||
check_file_invariants(&new_file);
|
||||
assert_eq!(&new_file.syntax().text().to_string(), &self.edited_text);
|
||||
let full_reparse = SourceFile::parse(&self.edited_text);
|
||||
for (a, b) in new_file.syntax().descendants().zip(full_reparse.syntax().descendants()) {
|
||||
if (a.kind(), a.range()) != (b.kind(), b.range()) {
|
||||
eprint!("original:\n{}", file.syntax().debug_dump());
|
||||
eprint!("reparsed:\n{}", new_file.syntax().debug_dump());
|
||||
eprint!("full reparse:\n{}", full_reparse.syntax().debug_dump());
|
||||
assert_eq!(
|
||||
format!("{:?}", a),
|
||||
format!("{:?}", b),
|
||||
"different syntax tree produced by the full reparse"
|
||||
);
|
||||
}
|
||||
}
|
||||
// FIXME
|
||||
// assert_eq!(new_file.errors(), full_reparse.errors());
|
||||
}
|
||||
}
|
@ -29,6 +29,8 @@ mod ptr;
|
||||
|
||||
pub mod algo;
|
||||
pub mod ast;
|
||||
#[doc(hidden)]
|
||||
pub mod fuzz;
|
||||
|
||||
pub use rowan::{SmolStr, TextRange, TextUnit};
|
||||
pub use ra_parser::SyntaxKind;
|
||||
@ -83,13 +85,6 @@ impl SourceFile {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_fuzz_invariants(text: &str) {
|
||||
let file = SourceFile::parse(text);
|
||||
let root = file.syntax();
|
||||
validation::validate_block_structure(root);
|
||||
let _ = file.errors();
|
||||
}
|
||||
|
||||
/// This test does not assert anything and instead just shows off the crate's
|
||||
/// API.
|
||||
#[test]
|
||||
|
@ -195,6 +195,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
|
||||
ptr.bump();
|
||||
true
|
||||
}
|
||||
('_', None) => return UNDERSCORE,
|
||||
('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE,
|
||||
_ => false,
|
||||
};
|
||||
|
@ -33,12 +33,19 @@ pub(crate) fn incremental_reparse(
|
||||
}
|
||||
|
||||
fn reparse_leaf<'node>(
|
||||
node: &'node SyntaxNode,
|
||||
root: &'node SyntaxNode,
|
||||
edit: &AtomTextEdit,
|
||||
) -> Option<(&'node SyntaxNode, GreenNode, Vec<SyntaxError>)> {
|
||||
let node = algo::find_covering_node(node, edit.delete);
|
||||
let node = algo::find_covering_node(root, edit.delete);
|
||||
match node.kind() {
|
||||
WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => {
|
||||
if node.kind() == WHITESPACE || node.kind() == COMMENT {
|
||||
// removing a new line may extends previous token
|
||||
if node.text().to_string()[edit.delete - node.range().start()].contains('\n') {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let text = get_text_after_edit(node, &edit);
|
||||
let tokens = tokenize(&text);
|
||||
let token = match tokens[..] {
|
||||
@ -50,6 +57,13 @@ fn reparse_leaf<'node>(
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(next_char) = root.text().char_at(node.range().end()) {
|
||||
let tokens_with_next_char = tokenize(&format!("{}{}", text, next_char));
|
||||
if tokens_with_next_char.len() == 1 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let green = GreenNode::new_leaf(node.kind(), text.into());
|
||||
let new_errors = vec![];
|
||||
Some((node, green, new_errors))
|
||||
@ -104,7 +118,7 @@ fn is_balanced(tokens: &[Token]) -> bool {
|
||||
return false;
|
||||
}
|
||||
let mut balance = 0usize;
|
||||
for t in tokens.iter() {
|
||||
for t in &tokens[1..tokens.len() - 1] {
|
||||
match t.kind {
|
||||
L_CURLY => balance += 1,
|
||||
R_CURLY => {
|
||||
@ -130,11 +144,11 @@ fn merge_errors(
|
||||
if e.offset() <= old_node.range().start() {
|
||||
res.push(e)
|
||||
} else if e.offset() >= old_node.range().end() {
|
||||
res.push(e.add_offset(TextUnit::of_str(&edit.insert) - edit.delete.len()));
|
||||
res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len()));
|
||||
}
|
||||
}
|
||||
for e in new_errors {
|
||||
res.push(e.add_offset(old_node.range().start()));
|
||||
res.push(e.add_offset(old_node.range().start(), 0.into()));
|
||||
}
|
||||
res
|
||||
}
|
||||
|
@ -48,10 +48,10 @@ impl SyntaxError {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_offset(mut self, plus_offset: TextUnit) -> SyntaxError {
|
||||
pub fn add_offset(mut self, plus_offset: TextUnit, minus_offset: TextUnit) -> SyntaxError {
|
||||
self.location = match self.location {
|
||||
Location::Range(range) => Location::Range(range + plus_offset),
|
||||
Location::Offset(offset) => Location::Offset(offset + plus_offset),
|
||||
Location::Range(range) => Location::Range(range + plus_offset - minus_offset),
|
||||
Location::Offset(offset) => Location::Offset(offset + plus_offset - minus_offset),
|
||||
};
|
||||
|
||||
self
|
||||
|
@ -0,0 +1,6 @@
|
||||
0
|
||||
1
|
||||
|
||||
|
||||
|
||||
0
|
@ -0,0 +1,4 @@
|
||||
0
|
||||
1
|
||||
|
||||
bb"
|
@ -0,0 +1,4 @@
|
||||
1
|
||||
1
|
||||
|
||||
""!
|
BIN
crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs
Normal file
BIN
crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs
Normal file
Binary file not shown.
@ -0,0 +1,4 @@
|
||||
0
|
||||
0
|
||||
}
|
||||
{;
|
@ -0,0 +1,7 @@
|
||||
05
|
||||
1
|
||||
|
||||
|
||||
|
||||
b'
|
||||
|
@ -8,7 +8,7 @@ use std::{
|
||||
};
|
||||
|
||||
use test_utils::{project_dir, dir_tests, read_text, collect_tests};
|
||||
use ra_syntax::{SourceFile, AstNode, check_fuzz_invariants};
|
||||
use ra_syntax::{SourceFile, AstNode, fuzz};
|
||||
|
||||
#[test]
|
||||
fn lexer_tests() {
|
||||
@ -47,7 +47,16 @@ fn parser_tests() {
|
||||
#[test]
|
||||
fn parser_fuzz_tests() {
|
||||
for (_, text) in collect_tests(&test_data_dir(), &["parser/fuzz-failures"]) {
|
||||
check_fuzz_invariants(&text)
|
||||
fuzz::check_parser(&text)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reparse_fuzz_tests() {
|
||||
for (_, text) in collect_tests(&test_data_dir(), &["reparse/fuzz-failures"]) {
|
||||
let check = fuzz::CheckReparse::from_data(text.as_bytes()).unwrap();
|
||||
println!("{:?}", check);
|
||||
check.run();
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user