Add more profiling traces

Someone suggested I look at
[zed](https://github.com/zed-industries/zed).
It has some interesting performance characteristics compared to projects
I looked at before:
- A lot of workspace members
- git dependencies
- patches

This adds traces that help provide more context for those scenarios when
looking at them.

Some of these areas might be of interest for
optimizing:
- On every invocation, we do a git2 `copy_to` call for each git checkout
- On every invocation, we do a `du` on each git checkout
- It'd be great to avoid parsing every manifest in a git checkout but we
  need to do a full parse to make sure we find all packages (see
  https://rust-lang.zulipchat.com/#narrow/stream/246057-t-cargo/topic/Redundant.20code.20in.20.60GitSouce.60.3F)
- Its suspicious how much time we spend in the "poison" step of
  resolving when its a no-op
This commit is contained in:
Ed Page 2024-07-11 13:18:25 -05:00
parent e55aad081b
commit 89eb406e47
6 changed files with 101 additions and 82 deletions

View File

@ -1799,6 +1799,7 @@ pub fn is_silent_error(e: &anyhow::Error) -> bool {
} }
/// Returns the disk usage for a git checkout directory. /// Returns the disk usage for a git checkout directory.
#[tracing::instrument]
pub fn du_git_checkout(path: &Path) -> CargoResult<u64> { pub fn du_git_checkout(path: &Path) -> CargoResult<u64> {
// !.git is used because clones typically use hardlinks for the git // !.git is used because clones typically use hardlinks for the git
// contents. TODO: Verify behavior on Windows. // contents. TODO: Verify behavior on Windows.

View File

@ -336,6 +336,7 @@ impl<'gctx> PackageRegistry<'gctx> {
/// The return value is a `Vec` of patches that should *not* be locked. /// The return value is a `Vec` of patches that should *not* be locked.
/// This happens when the patch is locked, but the patch has been updated /// This happens when the patch is locked, but the patch has been updated
/// so the locked value is no longer correct. /// so the locked value is no longer correct.
#[tracing::instrument(skip(self, patch_deps))]
pub fn patch( pub fn patch(
&mut self, &mut self,
url: &Url, url: &Url,
@ -784,6 +785,7 @@ impl<'gctx> Registry for PackageRegistry<'gctx> {
} }
} }
#[tracing::instrument(skip_all)]
fn block_until_ready(&mut self) -> CargoResult<()> { fn block_until_ready(&mut self) -> CargoResult<()> {
if cfg!(debug_assertions) { if cfg!(debug_assertions) {
// Force borrow to catch invalid borrows, regardless of which source is used and how it // Force borrow to catch invalid borrows, regardless of which source is used and how it

View File

@ -723,6 +723,7 @@ impl<'gctx> Workspace<'gctx> {
/// verifies that those are all valid packages to point to. Otherwise, this /// verifies that those are all valid packages to point to. Otherwise, this
/// will transitively follow all `path` dependencies looking for members of /// will transitively follow all `path` dependencies looking for members of
/// the workspace. /// the workspace.
#[tracing::instrument(skip_all)]
fn find_members(&mut self) -> CargoResult<()> { fn find_members(&mut self) -> CargoResult<()> {
let Some(workspace_config) = self.load_workspace_config()? else { let Some(workspace_config) = self.load_workspace_config()? else {
debug!("find_members - only me as a member"); debug!("find_members - only me as a member");
@ -886,6 +887,7 @@ impl<'gctx> Workspace<'gctx> {
/// 1. A workspace only has one root. /// 1. A workspace only has one root.
/// 2. All workspace members agree on this one root as the root. /// 2. All workspace members agree on this one root as the root.
/// 3. The current crate is a member of this workspace. /// 3. The current crate is a member of this workspace.
#[tracing::instrument(skip_all)]
fn validate(&mut self) -> CargoResult<()> { fn validate(&mut self) -> CargoResult<()> {
// The rest of the checks require a VirtualManifest or multiple members. // The rest of the checks require a VirtualManifest or multiple members.
if self.root_manifest.is_none() { if self.root_manifest.is_none() {
@ -954,6 +956,7 @@ impl<'gctx> Workspace<'gctx> {
} }
} }
#[tracing::instrument(skip_all)]
fn validate_members(&mut self) -> CargoResult<()> { fn validate_members(&mut self) -> CargoResult<()> {
for member in self.members.clone() { for member in self.members.clone() {
let root = self.find_root(&member)?; let root = self.find_root(&member)?;
@ -1812,6 +1815,7 @@ impl WorkspaceRootConfig {
self.members.is_some() self.members.is_some()
} }
#[tracing::instrument(skip_all)]
fn members_paths(&self, globs: &[String]) -> CargoResult<Vec<PathBuf>> { fn members_paths(&self, globs: &[String]) -> CargoResult<Vec<PathBuf>> {
let mut expanded_list = Vec::new(); let mut expanded_list = Vec::new();

View File

@ -34,6 +34,7 @@ pub fn read_package(
Ok(Package::new(manifest, path)) Ok(Package::new(manifest, path))
} }
#[tracing::instrument(skip(source_id, gctx))]
pub fn read_packages( pub fn read_packages(
path: &Path, path: &Path,
source_id: SourceId, source_id: SourceId,

View File

@ -387,6 +387,7 @@ pub fn resolve_with_previous<'gctx>(
register_previous_locks(ws, registry, r, &keep, dev_deps); register_previous_locks(ws, registry, r, &keep, dev_deps);
// Prefer to use anything in the previous lock file, aka we want to have conservative updates. // Prefer to use anything in the previous lock file, aka we want to have conservative updates.
let _span = tracing::span!(tracing::Level::TRACE, "prefer_package_id").entered();
for id in r.iter().filter(keep) { for id in r.iter().filter(keep) {
debug!("attempting to prefer {}", id); debug!("attempting to prefer {}", id);
version_prefs.prefer_package_id(id); version_prefs.prefer_package_id(id);
@ -397,20 +398,22 @@ pub fn resolve_with_previous<'gctx>(
registry.lock_patches(); registry.lock_patches();
} }
let summaries: Vec<(Summary, ResolveOpts)> = ws let summaries: Vec<(Summary, ResolveOpts)> = {
.members_with_features(specs, cli_features)? let _span = tracing::span!(tracing::Level::TRACE, "registry.lock").entered();
.into_iter() ws.members_with_features(specs, cli_features)?
.map(|(member, features)| { .into_iter()
let summary = registry.lock(member.summary().clone()); .map(|(member, features)| {
( let summary = registry.lock(member.summary().clone());
summary, (
ResolveOpts { summary,
dev_deps, ResolveOpts {
features: RequestedFeatures::CliFeatures(features), dev_deps,
}, features: RequestedFeatures::CliFeatures(features),
) },
}) )
.collect(); })
.collect()
};
let replace = lock_replacements(ws, previous, &keep); let replace = lock_replacements(ws, previous, &keep);
@ -498,6 +501,7 @@ pub fn get_resolved_packages<'gctx>(
/// ///
/// Note that this function, at the time of this writing, is basically the /// Note that this function, at the time of this writing, is basically the
/// entire fix for issue #4127. /// entire fix for issue #4127.
#[tracing::instrument(skip_all)]
fn register_previous_locks( fn register_previous_locks(
ws: &Workspace<'_>, ws: &Workspace<'_>,
registry: &mut PackageRegistry<'_>, registry: &mut PackageRegistry<'_>,
@ -578,60 +582,63 @@ fn register_previous_locks(
// crates from crates.io* are not locked (aka added to `avoid_locking`). // crates from crates.io* are not locked (aka added to `avoid_locking`).
// For dependencies like `log` their previous version in the lock file will // For dependencies like `log` their previous version in the lock file will
// come up first before newer version, if newer version are available. // come up first before newer version, if newer version are available.
let mut path_deps = ws.members().cloned().collect::<Vec<_>>(); {
let mut visited = HashSet::new(); let _span = tracing::span!(tracing::Level::TRACE, "poison").entered();
while let Some(member) = path_deps.pop() { let mut path_deps = ws.members().cloned().collect::<Vec<_>>();
if !visited.insert(member.package_id()) { let mut visited = HashSet::new();
continue; while let Some(member) = path_deps.pop() {
} if !visited.insert(member.package_id()) {
let is_ws_member = ws.is_member(&member);
for dep in member.dependencies() {
// If this dependency didn't match anything special then we may want
// to poison the source as it may have been added. If this path
// dependencies is **not** a workspace member, however, and it's an
// optional/non-transitive dependency then it won't be necessarily
// be in our lock file. If this shows up then we avoid poisoning
// this source as otherwise we'd repeatedly update the registry.
//
// TODO: this breaks adding an optional dependency in a
// non-workspace member and then simultaneously editing the
// dependency on that crate to enable the feature. For now,
// this bug is better than the always-updating registry though.
if !is_ws_member && (dep.is_optional() || !dep.is_transitive()) {
continue; continue;
} }
let is_ws_member = ws.is_member(&member);
for dep in member.dependencies() {
// If this dependency didn't match anything special then we may want
// to poison the source as it may have been added. If this path
// dependencies is **not** a workspace member, however, and it's an
// optional/non-transitive dependency then it won't be necessarily
// be in our lock file. If this shows up then we avoid poisoning
// this source as otherwise we'd repeatedly update the registry.
//
// TODO: this breaks adding an optional dependency in a
// non-workspace member and then simultaneously editing the
// dependency on that crate to enable the feature. For now,
// this bug is better than the always-updating registry though.
if !is_ws_member && (dep.is_optional() || !dep.is_transitive()) {
continue;
}
// If dev-dependencies aren't being resolved, skip them. // If dev-dependencies aren't being resolved, skip them.
if !dep.is_transitive() && !dev_deps { if !dep.is_transitive() && !dev_deps {
continue; continue;
} }
// If this is a path dependency, then try to push it onto our // If this is a path dependency, then try to push it onto our
// worklist. // worklist.
if let Some(pkg) = path_pkg(dep.source_id()) { if let Some(pkg) = path_pkg(dep.source_id()) {
path_deps.push(pkg); path_deps.push(pkg);
continue; continue;
} }
// If we match *anything* in the dependency graph then we consider // If we match *anything* in the dependency graph then we consider
// ourselves all ok, and assume that we'll resolve to that. // ourselves all ok, and assume that we'll resolve to that.
if resolve.iter().any(|id| dep.matches_ignoring_source(id)) { if resolve.iter().any(|id| dep.matches_ignoring_source(id)) {
continue; continue;
} }
// Ok if nothing matches, then we poison the source of these // Ok if nothing matches, then we poison the source of these
// dependencies and the previous lock file. // dependencies and the previous lock file.
debug!( debug!(
"poisoning {} because {} looks like it changed {}", "poisoning {} because {} looks like it changed {}",
dep.source_id(), dep.source_id(),
member.package_id(), member.package_id(),
dep.package_name() dep.package_name()
); );
for id in resolve for id in resolve
.iter() .iter()
.filter(|id| id.source_id() == dep.source_id()) .filter(|id| id.source_id() == dep.source_id())
{ {
add_deps(resolve, id, &mut avoid_locking); add_deps(resolve, id, &mut avoid_locking);
}
} }
} }
} }
@ -661,28 +668,31 @@ fn register_previous_locks(
let keep = |id: &PackageId| keep(id) && !avoid_locking.contains(id); let keep = |id: &PackageId| keep(id) && !avoid_locking.contains(id);
registry.clear_lock(); registry.clear_lock();
for node in resolve.iter().filter(keep) { {
let deps = resolve let _span = tracing::span!(tracing::Level::TRACE, "register_lock").entered();
.deps_not_replaced(node) for node in resolve.iter().filter(keep) {
.map(|p| p.0) let deps = resolve
.filter(keep) .deps_not_replaced(node)
.collect::<Vec<_>>(); .map(|p| p.0)
.filter(keep)
.collect::<Vec<_>>();
// In the v2 lockfile format and prior the `branch=master` dependency // In the v2 lockfile format and prior the `branch=master` dependency
// directive was serialized the same way as the no-branch-listed // directive was serialized the same way as the no-branch-listed
// directive. Nowadays in Cargo, however, these two directives are // directive. Nowadays in Cargo, however, these two directives are
// considered distinct and are no longer represented the same way. To // considered distinct and are no longer represented the same way. To
// maintain compatibility with older lock files we register locked nodes // maintain compatibility with older lock files we register locked nodes
// for *both* the master branch and the default branch. // for *both* the master branch and the default branch.
// //
// Note that this is only applicable for loading older resolves now at // Note that this is only applicable for loading older resolves now at
// this point. All new lock files are encoded as v3-or-later, so this is // this point. All new lock files are encoded as v3-or-later, so this is
// just compat for loading an old lock file successfully. // just compat for loading an old lock file successfully.
if let Some(node) = master_branch_git_source(node, resolve) { if let Some(node) = master_branch_git_source(node, resolve) {
registry.register_lock(node, deps.clone()); registry.register_lock(node, deps.clone());
}
registry.register_lock(node, deps);
} }
registry.register_lock(node, deps);
} }
/// Recursively add `node` and all its transitive dependencies to `set`. /// Recursively add `node` and all its transitive dependencies to `set`.

View File

@ -164,6 +164,7 @@ impl GitRemote {
impl GitDatabase { impl GitDatabase {
/// Checkouts to a revision at `dest`ination from this database. /// Checkouts to a revision at `dest`ination from this database.
#[tracing::instrument(skip(self, gctx))]
pub fn copy_to( pub fn copy_to(
&self, &self,
rev: git2::Oid, rev: git2::Oid,