From 793396c624376e5b55bdbe8ce7d1923f68302c7a Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 14 May 2024 11:21:04 +0200 Subject: [PATCH 1/2] Track hashes for file contents --- crates/stdx/src/lib.rs | 4 ++++ crates/vfs/src/lib.rs | 36 ++++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/crates/stdx/src/lib.rs b/crates/stdx/src/lib.rs index 0504ca50b8..54f10df42a 100644 --- a/crates/stdx/src/lib.rs +++ b/crates/stdx/src/lib.rs @@ -22,6 +22,10 @@ pub fn is_ci() -> bool { option_env!("CI").is_some() } +pub fn hash_once(thing: impl std::hash::Hash) -> u64 { + std::hash::BuildHasher::hash_one(&std::hash::BuildHasherDefault::::default(), thing) +} + #[must_use] #[allow(clippy::print_stderr)] pub fn timeit(label: &'static str) -> impl Drop { diff --git a/crates/vfs/src/lib.rs b/crates/vfs/src/lib.rs index cb3e058355..5be69d87c8 100644 --- a/crates/vfs/src/lib.rs +++ b/crates/vfs/src/lib.rs @@ -56,6 +56,8 @@ pub use crate::{ }; pub use paths::{AbsPath, AbsPathBuf}; +use rustc_hash::FxHasher; +use stdx::hash_once; use tracing::{span, Level}; /// Handle to a file in [`Vfs`] @@ -106,7 +108,7 @@ pub enum FileState { /// The file has been created this cycle. Created, /// The file exists. - Exists, + Exists(u64), /// The file is deleted. Deleted, } @@ -139,13 +141,13 @@ impl ChangedFile { /// Returns `true` if the change is [`Modify`](ChangeKind::Modify). pub fn is_modified(&self) -> bool { - matches!(self.change, Change::Modify(_)) + matches!(self.change, Change::Modify(_, _)) } pub fn kind(&self) -> ChangeKind { match self.change { Change::Create(_) => ChangeKind::Create, - Change::Modify(_) => ChangeKind::Modify, + Change::Modify(_, _) => ChangeKind::Modify, Change::Delete => ChangeKind::Delete, } } @@ -157,7 +159,7 @@ pub enum Change { /// The file was (re-)created Create(Vec), /// The file was modified - Modify(Vec), + Modify(Vec, u64), /// The file was deleted Delete, } @@ -178,7 +180,7 @@ impl Vfs { pub fn file_id(&self, path: &VfsPath) -> Option { self.interner .get(path) - .filter(|&it| matches!(self.get(it), FileState::Exists | FileState::Created)) + .filter(|&it| matches!(self.get(it), FileState::Exists(_) | FileState::Created)) } /// File path corresponding to the given `file_id`. @@ -197,7 +199,7 @@ impl Vfs { (0..self.data.len()) .map(|it| FileId(it as u32)) .filter(move |&file_id| { - matches!(self.get(file_id), FileState::Exists | FileState::Created) + matches!(self.get(file_id), FileState::Exists(_) | FileState::Created) }) .map(move |file_id| { let path = self.interner.lookup(file_id); @@ -218,8 +220,18 @@ impl Vfs { let change_kind = match (state, contents) { (FileState::Deleted, None) => return false, (FileState::Deleted, Some(v)) => Change::Create(v), - (FileState::Exists | FileState::Created, None) => Change::Delete, - (FileState::Exists | FileState::Created, Some(v)) => Change::Modify(v), + (FileState::Exists(_) | FileState::Created, None) => Change::Delete, + (FileState::Created, Some(v)) => { + let hash = hash_once::(&*v); + Change::Modify(v, hash) + } + (FileState::Exists(hash), Some(v)) => { + let new_hash = hash_once::(&*v); + if new_hash == hash { + return false; + } + Change::Modify(v, new_hash) + } }; self.data[file_id.0 as usize] = match change_kind { Change::Create(_) => { @@ -228,8 +240,8 @@ impl Vfs { } // If the file got created this cycle, make sure we keep it that way even // if a modify comes in - Change::Modify(_) if matches!(state, FileState::Created) => FileState::Created, - Change::Modify(_) => FileState::Exists, + Change::Modify(_, _) if matches!(state, FileState::Created) => FileState::Created, + Change::Modify(_, hash) => FileState::Exists(hash), Change::Delete => FileState::Deleted, }; let changed_file = ChangedFile { file_id, change: change_kind }; @@ -243,7 +255,7 @@ impl Vfs { for file_id in self.created_this_cycle.drain(..) { if self.data[file_id.0 as usize] == FileState::Created { // downgrade the file from `Created` to `Exists` as the cycle is done - self.data[file_id.0 as usize] = FileState::Exists; + self.data[file_id.0 as usize] = FileState::Exists(todo!()); } } mem::take(&mut self.changes) @@ -251,7 +263,7 @@ impl Vfs { /// Provides a panic-less way to verify file_id validity. pub fn exists(&self, file_id: FileId) -> bool { - matches!(self.get(file_id), FileState::Exists | FileState::Created) + matches!(self.get(file_id), FileState::Exists(_) | FileState::Created) } /// Returns the id associated with `path` From 1ca97ba8962b30819429f11c6a9fa17ab6aeeebe Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Tue, 14 May 2024 11:55:12 +0200 Subject: [PATCH 2/2] Hash file contents to verify whether file actually changed --- crates/hir-def/src/item_tree.rs | 5 +- crates/load-cargo/src/lib.rs | 4 +- crates/rust-analyzer/src/global_state.rs | 59 ++++--------- crates/vfs/src/lib.rs | 101 +++++++++++++---------- 4 files changed, 75 insertions(+), 94 deletions(-) diff --git a/crates/hir-def/src/item_tree.rs b/crates/hir-def/src/item_tree.rs index 80064e18fa..acda64c41f 100644 --- a/crates/hir-def/src/item_tree.rs +++ b/crates/hir-def/src/item_tree.rs @@ -29,6 +29,7 @@ //! //! In general, any item in the `ItemTree` stores its `AstId`, which allows mapping it back to its //! surface syntax. +#![allow(unexpected_cfgs)] mod lower; mod pretty; @@ -467,13 +468,12 @@ macro_rules! mod_items { pub enum GenericModItem { $( $( - #[cfg_attr(FALSE, $generic_params)] + #[cfg_attr(ignore_fragment, $generic_params)] $typ(FileItemTreeId<$typ>), )? )+ } - #[allow(unexpected_cfgs)] impl From for ModItem { fn from(id: GenericModItem) -> ModItem { match id { @@ -494,7 +494,6 @@ macro_rules! mod_items { } $( - #[allow(unexpected_cfgs)] impl From> for ModItem { fn from(id: FileItemTreeId<$typ>) -> ModItem { ModItem::$typ(id) diff --git a/crates/load-cargo/src/lib.rs b/crates/load-cargo/src/lib.rs index dc8ea51039..76940ab57a 100644 --- a/crates/load-cargo/src/lib.rs +++ b/crates/load-cargo/src/lib.rs @@ -361,8 +361,8 @@ fn load_crate_graph( } } let changes = vfs.take_changes(); - for file in changes { - if let vfs::Change::Create(v) | vfs::Change::Modify(v) = file.change { + for (_, file) in changes { + if let vfs::Change::Create(v, _) | vfs::Change::Modify(v, _) = file.change { if let Ok(text) = String::from_utf8(v) { analysis_change.change_file(file.file_id, Some(text)) } diff --git a/crates/rust-analyzer/src/global_state.rs b/crates/rust-analyzer/src/global_state.rs index f5b3ef5103..f64e66183d 100644 --- a/crates/rust-analyzer/src/global_state.rs +++ b/crates/rust-analyzer/src/global_state.rs @@ -3,7 +3,7 @@ //! //! Each tick provides an immutable snapshot of the state as `WorldSnapshot`. -use std::{collections::hash_map::Entry, time::Instant}; +use std::time::Instant; use crossbeam_channel::{unbounded, Receiver, Sender}; use flycheck::FlycheckHandle; @@ -25,7 +25,7 @@ use project_model::{ use rustc_hash::{FxHashMap, FxHashSet}; use tracing::{span, Level}; use triomphe::Arc; -use vfs::{AnchoredPathBuf, ChangedFile, Vfs}; +use vfs::{AnchoredPathBuf, Vfs}; use crate::{ config::{Config, ConfigError}, @@ -254,7 +254,6 @@ impl GlobalState { pub(crate) fn process_changes(&mut self) -> bool { let _p = span!(Level::INFO, "GlobalState::process_changes").entered(); - let mut file_changes = FxHashMap::<_, ChangedFile>::default(); let (change, modified_rust_files, workspace_structure_change) = { let mut change = ChangeWithProcMacros::new(); let mut guard = self.vfs.write(); @@ -266,44 +265,13 @@ impl GlobalState { // downgrade to read lock to allow more readers while we are normalizing text let guard = RwLockWriteGuard::downgrade_to_upgradable(guard); let vfs: &Vfs = &guard.0; - // We need to fix up the changed events a bit. If we have a create or modify for a file - // id that is followed by a delete we actually skip observing the file text from the - // earlier event, to avoid problems later on. - for changed_file in changed_files { - use vfs::Change::*; - match file_changes.entry(changed_file.file_id) { - Entry::Occupied(mut o) => { - let change = o.get_mut(); - match (&mut change.change, changed_file.change) { - // latter `Delete` wins - (change, Delete) => *change = Delete, - // merge `Create` with `Create` or `Modify` - (Create(prev), Create(new) | Modify(new)) => *prev = new, - // collapse identical `Modify`es - (Modify(prev), Modify(new)) => *prev = new, - // equivalent to `Modify` - (change @ Delete, Create(new)) => { - *change = Modify(new); - } - // shouldn't occur, but collapse into `Create` - (change @ Delete, Modify(new)) => { - stdx::never!(); - *change = Create(new); - } - // shouldn't occur, but keep the Create - (prev @ Modify(_), new @ Create(_)) => *prev = new, - } - } - Entry::Vacant(v) => _ = v.insert(changed_file), - } - } let mut workspace_structure_change = None; // A file was added or deleted let mut has_structure_changes = false; let mut bytes = vec![]; let mut modified_rust_files = vec![]; - for file in file_changes.into_values() { + for file in changed_files.into_values() { let vfs_path = vfs.file_path(file.file_id); if let Some(path) = vfs_path.as_path() { has_structure_changes |= file.is_created_or_deleted(); @@ -326,16 +294,17 @@ impl GlobalState { self.diagnostics.clear_native_for(file.file_id); } - let text = if let vfs::Change::Create(v) | vfs::Change::Modify(v) = file.change { - String::from_utf8(v).ok().map(|text| { - // FIXME: Consider doing normalization in the `vfs` instead? That allows - // getting rid of some locking - let (text, line_endings) = LineEndings::normalize(text); - (text, line_endings) - }) - } else { - None - }; + let text = + if let vfs::Change::Create(v, _) | vfs::Change::Modify(v, _) = file.change { + String::from_utf8(v).ok().map(|text| { + // FIXME: Consider doing normalization in the `vfs` instead? That allows + // getting rid of some locking + let (text, line_endings) = LineEndings::normalize(text); + (text, line_endings) + }) + } else { + None + }; // delay `line_endings_map` changes until we are done normalizing the text // this allows delaying the re-acquisition of the write lock bytes.push((file.file_id, text)); diff --git a/crates/vfs/src/lib.rs b/crates/vfs/src/lib.rs index 5be69d87c8..b07e97cd6c 100644 --- a/crates/vfs/src/lib.rs +++ b/crates/vfs/src/lib.rs @@ -46,7 +46,7 @@ pub mod loader; mod path_interner; mod vfs_path; -use std::{fmt, mem}; +use std::{fmt, hash::BuildHasherDefault, mem}; use crate::path_interner::PathInterner; @@ -54,6 +54,7 @@ pub use crate::{ anchored_path::{AnchoredPath, AnchoredPathBuf}, vfs_path::VfsPath, }; +use indexmap::{map::Entry, IndexMap}; pub use paths::{AbsPath, AbsPathBuf}; use rustc_hash::FxHasher; @@ -95,19 +96,12 @@ impl nohash_hasher::IsEnabled for FileId {} pub struct Vfs { interner: PathInterner, data: Vec, - // FIXME: This should be a HashMap - // right now we do a nasty deduplication in GlobalState::process_changes that would be a lot - // easier to handle here on insertion. - changes: Vec, - // The above FIXME would then also get rid of this probably - created_this_cycle: Vec, + changes: IndexMap>, } #[derive(Copy, Clone, Debug, PartialEq, PartialOrd)] pub enum FileState { - /// The file has been created this cycle. - Created, - /// The file exists. + /// The file exists with the given content hash. Exists(u64), /// The file is deleted. Deleted, @@ -131,12 +125,12 @@ impl ChangedFile { /// Returns `true` if the change is [`Create`](ChangeKind::Create) or /// [`Delete`](Change::Delete). pub fn is_created_or_deleted(&self) -> bool { - matches!(self.change, Change::Create(_) | Change::Delete) + matches!(self.change, Change::Create(_, _) | Change::Delete) } /// Returns `true` if the change is [`Create`](ChangeKind::Create). pub fn is_created(&self) -> bool { - matches!(self.change, Change::Create(_)) + matches!(self.change, Change::Create(_, _)) } /// Returns `true` if the change is [`Modify`](ChangeKind::Modify). @@ -146,7 +140,7 @@ impl ChangedFile { pub fn kind(&self) -> ChangeKind { match self.change { - Change::Create(_) => ChangeKind::Create, + Change::Create(_, _) => ChangeKind::Create, Change::Modify(_, _) => ChangeKind::Modify, Change::Delete => ChangeKind::Delete, } @@ -157,7 +151,7 @@ impl ChangedFile { #[derive(Eq, PartialEq, Debug)] pub enum Change { /// The file was (re-)created - Create(Vec), + Create(Vec, u64), /// The file was modified Modify(Vec, u64), /// The file was deleted @@ -178,9 +172,7 @@ pub enum ChangeKind { impl Vfs { /// Id of the given path if it exists in the `Vfs` and is not deleted. pub fn file_id(&self, path: &VfsPath) -> Option { - self.interner - .get(path) - .filter(|&it| matches!(self.get(it), FileState::Exists(_) | FileState::Created)) + self.interner.get(path).filter(|&it| matches!(self.get(it), FileState::Exists(_))) } /// File path corresponding to the given `file_id`. @@ -198,9 +190,7 @@ impl Vfs { pub fn iter(&self) -> impl Iterator + '_ { (0..self.data.len()) .map(|it| FileId(it as u32)) - .filter(move |&file_id| { - matches!(self.get(file_id), FileState::Exists(_) | FileState::Created) - }) + .filter(move |&file_id| matches!(self.get(file_id), FileState::Exists(_))) .map(move |file_id| { let path = self.interner.lookup(file_id); (file_id, path) @@ -219,12 +209,11 @@ impl Vfs { let state = self.get(file_id); let change_kind = match (state, contents) { (FileState::Deleted, None) => return false, - (FileState::Deleted, Some(v)) => Change::Create(v), - (FileState::Exists(_) | FileState::Created, None) => Change::Delete, - (FileState::Created, Some(v)) => { + (FileState::Deleted, Some(v)) => { let hash = hash_once::(&*v); - Change::Modify(v, hash) + Change::Create(v, hash) } + (FileState::Exists(_), None) => Change::Delete, (FileState::Exists(hash), Some(v)) => { let new_hash = hash_once::(&*v); if new_hash == hash { @@ -233,37 +222,61 @@ impl Vfs { Change::Modify(v, new_hash) } }; - self.data[file_id.0 as usize] = match change_kind { - Change::Create(_) => { - self.created_this_cycle.push(file_id); - FileState::Created - } - // If the file got created this cycle, make sure we keep it that way even - // if a modify comes in - Change::Modify(_, _) if matches!(state, FileState::Created) => FileState::Created, - Change::Modify(_, hash) => FileState::Exists(hash), - Change::Delete => FileState::Deleted, + + let mut set_data = |change_kind| { + self.data[file_id.0 as usize] = match change_kind { + &Change::Create(_, hash) | &Change::Modify(_, hash) => FileState::Exists(hash), + Change::Delete => FileState::Deleted, + }; }; + let changed_file = ChangedFile { file_id, change: change_kind }; - self.changes.push(changed_file); + match self.changes.entry(file_id) { + // two changes to the same file in one cycle, merge them appropriately + Entry::Occupied(mut o) => { + use Change::*; + + match (&mut o.get_mut().change, changed_file.change) { + // newer `Delete` wins + (change, Delete) => *change = Delete, + // merge `Create` with `Create` or `Modify` + (Create(prev, old_hash), Create(new, new_hash) | Modify(new, new_hash)) => { + *prev = new; + *old_hash = new_hash; + } + // collapse identical `Modify`es + (Modify(prev, old_hash), Modify(new, new_hash)) => { + *prev = new; + *old_hash = new_hash; + } + // equivalent to `Modify` + (change @ Delete, Create(new, new_hash)) => { + *change = Modify(new, new_hash); + } + // shouldn't occur, but collapse into `Create` + (change @ Delete, Modify(new, new_hash)) => { + stdx::never!(); + *change = Create(new, new_hash); + } + // shouldn't occur, but keep the Create + (prev @ Modify(_, _), new @ Create(_, _)) => *prev = new, + } + set_data(&o.get().change); + } + Entry::Vacant(v) => set_data(&v.insert(changed_file).change), + }; + true } /// Drain and returns all the changes in the `Vfs`. - pub fn take_changes(&mut self) -> Vec { - let _p = span!(Level::INFO, "Vfs::take_changes").entered(); - for file_id in self.created_this_cycle.drain(..) { - if self.data[file_id.0 as usize] == FileState::Created { - // downgrade the file from `Created` to `Exists` as the cycle is done - self.data[file_id.0 as usize] = FileState::Exists(todo!()); - } - } + pub fn take_changes(&mut self) -> IndexMap> { mem::take(&mut self.changes) } /// Provides a panic-less way to verify file_id validity. pub fn exists(&self, file_id: FileId) -> bool { - matches!(self.get(file_id), FileState::Exists(_) | FileState::Created) + matches!(self.get(file_id), FileState::Exists(_)) } /// Returns the id associated with `path`