1013: Fuzz reparsing and fix found bugs r=matklad a=pcpthm

Add fuzz test for reparsing which:
- Checks reparsing doesn't panic and validate result syntax tree.
- Checks that incremental reparsing produces the same syntax tree as full reparse.
- Check for that errors are the same as full reparsing is disabled because errors are less important than syntax tree and produce failures which I couldn't figure out how to fix immediately (FIXME comment).

I guess the current input generation is inefficient but still found several bugs:
- Arithmetic overflow (negative result on an unsigned type). I changed the signature of `SyntaxError::add_offset` to solve this problem.
- When reparsing a leaf, the token of the leaf can be joined to the next characters. Such case was not considered.
- UNDERSCORE token was not produced when text length is exactly 1 (not a reparsing bug).
- When reparsing a block, *inner* curly braces should be balanced. i.e. `{}{}` is invalid.
- Effects of deleting newlines were not considered.

Co-authored-by: pcpthm <pcpthm@gmail.com>
This commit is contained in:
bors[bot] 2019-03-22 05:48:55 +00:00
commit 2a6544f906
15 changed files with 149 additions and 24 deletions

View File

@ -4,14 +4,15 @@ name = "ra_syntax-fuzz"
version = "0.0.1"
authors = ["rust-analyzer developers"]
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies.ra_syntax]
path = ".."
[dependencies.libfuzzer-sys]
git = "https://github.com/rust-fuzz/libfuzzer-sys.git"
[dependencies]
ra_syntax = { path = ".." }
ra_text_edit = { path = "../../ra_text_edit" }
libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" }
# Prevent this from interfering with workspaces
[workspace]
@ -20,3 +21,7 @@ members = ["."]
[[bin]]
name = "parser"
path = "fuzz_targets/parser.rs"
[[bin]]
name = "reparse"
path = "fuzz_targets/reparse.rs"

View File

@ -1,9 +1,9 @@
#![no_main]
#[macro_use] extern crate libfuzzer_sys;
extern crate ra_syntax;
use libfuzzer_sys::fuzz_target;
use ra_syntax::fuzz::check_parser;
fuzz_target!(|data: &[u8]| {
if let Ok(text) = std::str::from_utf8(data) {
ra_syntax::check_fuzz_invariants(text)
check_parser(text)
}
});

View File

@ -0,0 +1,9 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use ra_syntax::fuzz::CheckReparse;
fuzz_target!(|data: &[u8]| {
if let Some(check) = CheckReparse::from_data(data) {
check.run();
}
});

View File

@ -0,0 +1,67 @@
use crate::{SourceFile, validation, TextUnit, TextRange, AstNode};
use ra_text_edit::AtomTextEdit;
use std::str::{self, FromStr};
fn check_file_invariants(file: &SourceFile) {
let root = file.syntax();
validation::validate_block_structure(root);
let _ = file.errors();
}
pub fn check_parser(text: &str) {
let file = SourceFile::parse(text);
check_file_invariants(&file);
}
#[derive(Debug, Clone)]
pub struct CheckReparse {
text: String,
edit: AtomTextEdit,
edited_text: String,
}
impl CheckReparse {
pub fn from_data(data: &[u8]) -> Option<Self> {
const PREFIX: &'static str = "fn main(){\n\t";
const SUFFIX: &'static str = "\n}";
let data = str::from_utf8(data).ok()?;
let mut lines = data.lines();
let delete_start = usize::from_str(lines.next()?).ok()? + PREFIX.len();
let delete_len = usize::from_str(lines.next()?).ok()?;
let insert = lines.next()?.to_string();
let text = lines.collect::<Vec<_>>().join("\n");
let text = format!("{}{}{}", PREFIX, text, SUFFIX);
text.get(delete_start..delete_start.checked_add(delete_len)?)?; // make sure delete is a valid range
let delete = TextRange::offset_len(
TextUnit::from_usize(delete_start),
TextUnit::from_usize(delete_len),
);
let edited_text =
format!("{}{}{}", &text[..delete_start], &insert, &text[delete_start + delete_len..]);
let edit = AtomTextEdit { delete, insert };
Some(CheckReparse { text, edit, edited_text })
}
pub fn run(&self) {
let file = SourceFile::parse(&self.text);
let new_file = file.reparse(&self.edit);
check_file_invariants(&new_file);
assert_eq!(&new_file.syntax().text().to_string(), &self.edited_text);
let full_reparse = SourceFile::parse(&self.edited_text);
for (a, b) in new_file.syntax().descendants().zip(full_reparse.syntax().descendants()) {
if (a.kind(), a.range()) != (b.kind(), b.range()) {
eprint!("original:\n{}", file.syntax().debug_dump());
eprint!("reparsed:\n{}", new_file.syntax().debug_dump());
eprint!("full reparse:\n{}", full_reparse.syntax().debug_dump());
assert_eq!(
format!("{:?}", a),
format!("{:?}", b),
"different syntax tree produced by the full reparse"
);
}
}
// FIXME
// assert_eq!(new_file.errors(), full_reparse.errors());
}
}

View File

@ -29,6 +29,8 @@ mod ptr;
pub mod algo;
pub mod ast;
#[doc(hidden)]
pub mod fuzz;
pub use rowan::{SmolStr, TextRange, TextUnit};
pub use ra_parser::SyntaxKind;
@ -83,13 +85,6 @@ impl SourceFile {
}
}
pub fn check_fuzz_invariants(text: &str) {
let file = SourceFile::parse(text);
let root = file.syntax();
validation::validate_block_structure(root);
let _ = file.errors();
}
/// This test does not assert anything and instead just shows off the crate's
/// API.
#[test]

View File

@ -195,6 +195,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
ptr.bump();
true
}
('_', None) => return UNDERSCORE,
('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE,
_ => false,
};

View File

@ -33,12 +33,19 @@ pub(crate) fn incremental_reparse(
}
fn reparse_leaf<'node>(
node: &'node SyntaxNode,
root: &'node SyntaxNode,
edit: &AtomTextEdit,
) -> Option<(&'node SyntaxNode, GreenNode, Vec<SyntaxError>)> {
let node = algo::find_covering_node(node, edit.delete);
let node = algo::find_covering_node(root, edit.delete);
match node.kind() {
WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => {
if node.kind() == WHITESPACE || node.kind() == COMMENT {
// removing a new line may extends previous token
if node.text().to_string()[edit.delete - node.range().start()].contains('\n') {
return None;
}
}
let text = get_text_after_edit(node, &edit);
let tokens = tokenize(&text);
let token = match tokens[..] {
@ -50,6 +57,13 @@ fn reparse_leaf<'node>(
return None;
}
if let Some(next_char) = root.text().char_at(node.range().end()) {
let tokens_with_next_char = tokenize(&format!("{}{}", text, next_char));
if tokens_with_next_char.len() == 1 {
return None;
}
}
let green = GreenNode::new_leaf(node.kind(), text.into());
let new_errors = vec![];
Some((node, green, new_errors))
@ -104,7 +118,7 @@ fn is_balanced(tokens: &[Token]) -> bool {
return false;
}
let mut balance = 0usize;
for t in tokens.iter() {
for t in &tokens[1..tokens.len() - 1] {
match t.kind {
L_CURLY => balance += 1,
R_CURLY => {
@ -130,11 +144,11 @@ fn merge_errors(
if e.offset() <= old_node.range().start() {
res.push(e)
} else if e.offset() >= old_node.range().end() {
res.push(e.add_offset(TextUnit::of_str(&edit.insert) - edit.delete.len()));
res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len()));
}
}
for e in new_errors {
res.push(e.add_offset(old_node.range().start()));
res.push(e.add_offset(old_node.range().start(), 0.into()));
}
res
}

View File

@ -48,10 +48,10 @@ impl SyntaxError {
}
}
pub fn add_offset(mut self, plus_offset: TextUnit) -> SyntaxError {
pub fn add_offset(mut self, plus_offset: TextUnit, minus_offset: TextUnit) -> SyntaxError {
self.location = match self.location {
Location::Range(range) => Location::Range(range + plus_offset),
Location::Offset(offset) => Location::Offset(offset + plus_offset),
Location::Range(range) => Location::Range(range + plus_offset - minus_offset),
Location::Offset(offset) => Location::Offset(offset + plus_offset - minus_offset),
};
self

View File

@ -0,0 +1,6 @@
0
1
0

View File

@ -0,0 +1,4 @@
0
1
bb"

View File

@ -0,0 +1,4 @@
1
1
""!

View File

@ -0,0 +1,4 @@
0
0
}
{;

View File

@ -0,0 +1,7 @@
05
1
b'

View File

@ -8,7 +8,7 @@ use std::{
};
use test_utils::{project_dir, dir_tests, read_text, collect_tests};
use ra_syntax::{SourceFile, AstNode, check_fuzz_invariants};
use ra_syntax::{SourceFile, AstNode, fuzz};
#[test]
fn lexer_tests() {
@ -47,7 +47,16 @@ fn parser_tests() {
#[test]
fn parser_fuzz_tests() {
for (_, text) in collect_tests(&test_data_dir(), &["parser/fuzz-failures"]) {
check_fuzz_invariants(&text)
fuzz::check_parser(&text)
}
}
#[test]
fn reparse_fuzz_tests() {
for (_, text) in collect_tests(&test_data_dir(), &["reparse/fuzz-failures"]) {
let check = fuzz::CheckReparse::from_data(text.as_bytes()).unwrap();
println!("{:?}", check);
check.run();
}
}