From a7fcef21feb4d835d1fee83b3f93b4aef86d5545 Mon Sep 17 00:00:00 2001 From: 0xPoe Date: Wed, 23 Jul 2025 22:09:13 +0200 Subject: [PATCH] refactor: move `IndexPackage` and `RegistryDependency` to `cargo-util-schemas` for better modularity Signed-off-by: 0xPoe --- crates/cargo-util-schemas/Cargo.toml | 1 + crates/cargo-util-schemas/src/index.rs | 145 +++++++++++ crates/cargo-util-schemas/src/lib.rs | 1 + src/cargo/ops/cargo_package/mod.rs | 2 +- src/cargo/sources/registry/index/mod.rs | 327 +++++++----------------- src/cargo/sources/registry/mod.rs | 5 +- 6 files changed, 243 insertions(+), 238 deletions(-) create mode 100644 crates/cargo-util-schemas/src/index.rs diff --git a/crates/cargo-util-schemas/Cargo.toml b/crates/cargo-util-schemas/Cargo.toml index 2682b65e1..da4699f98 100644 --- a/crates/cargo-util-schemas/Cargo.toml +++ b/crates/cargo-util-schemas/Cargo.toml @@ -25,6 +25,7 @@ workspace = true [dev-dependencies] snapbox.workspace = true +serde_json.workspace = true [features] unstable-schema = ["dep:schemars", "dep:serde_json"] diff --git a/crates/cargo-util-schemas/src/index.rs b/crates/cargo-util-schemas/src/index.rs new file mode 100644 index 000000000..489d110b2 --- /dev/null +++ b/crates/cargo-util-schemas/src/index.rs @@ -0,0 +1,145 @@ +use crate::manifest::RustVersion; +use semver::Version; +use serde::{Deserialize, Serialize}; +use std::{borrow::Cow, collections::BTreeMap}; + +/// A single line in the index representing a single version of a package. +#[derive(Deserialize, Serialize)] +pub struct IndexPackage<'a> { + /// Name of the package. + #[serde(borrow)] + pub name: Cow<'a, str>, + /// The version of this dependency. + pub vers: Version, + /// All kinds of direct dependencies of the package, including dev and + /// build dependencies. + #[serde(borrow)] + pub deps: Vec>, + /// Set of features defined for the package, i.e., `[features]` table. + #[serde(default)] + pub features: BTreeMap, Vec>>, + /// This field contains features with new, extended syntax. Specifically, + /// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`). + /// + /// This is separated from `features` because versions older than 1.19 + /// will fail to load due to not being able to parse the new syntax, even + /// with a `Cargo.lock` file. + pub features2: Option, Vec>>>, + /// Checksum for verifying the integrity of the corresponding downloaded package. + pub cksum: String, + /// If `true`, Cargo will skip this version when resolving. + /// + /// This was added in 2014. Everything in the crates.io index has this set + /// now, so this probably doesn't need to be an option anymore. + pub yanked: Option, + /// Native library name this package links to. + /// + /// Added early 2018 (see ), + /// can be `None` if published before then. + pub links: Option>, + /// Required version of rust + /// + /// Corresponds to `package.rust-version`. + /// + /// Added in 2023 (see ), + /// can be `None` if published before then or if not set in the manifest. + pub rust_version: Option, + /// The schema version for this entry. + /// + /// If this is None, it defaults to version `1`. Entries with unknown + /// versions are ignored. + /// + /// Version `2` schema adds the `features2` field. + /// + /// Version `3` schema adds `artifact`, `bindep_targes`, and `lib` for + /// artifact dependencies support. + /// + /// This provides a method to safely introduce changes to index entries + /// and allow older versions of cargo to ignore newer entries it doesn't + /// understand. This is honored as of 1.51, so unfortunately older + /// versions will ignore it, and potentially misinterpret version 2 and + /// newer entries. + /// + /// The intent is that versions older than 1.51 will work with a + /// pre-existing `Cargo.lock`, but they may not correctly process `cargo + /// update` or build a lock from scratch. In that case, cargo may + /// incorrectly select a new package that uses a new index schema. A + /// workaround is to downgrade any packages that are incompatible with the + /// `--precise` flag of `cargo update`. + pub v: Option, +} + +/// A dependency as encoded in the [`IndexPackage`] index JSON. +#[derive(Deserialize, Serialize, Clone)] +pub struct RegistryDependency<'a> { + /// Name of the dependency. If the dependency is renamed, the original + /// would be stored in [`RegistryDependency::package`]. + #[serde(borrow)] + pub name: Cow<'a, str>, + /// The SemVer requirement for this dependency. + #[serde(borrow)] + pub req: Cow<'a, str>, + /// Set of features enabled for this dependency. + #[serde(default)] + pub features: Vec>, + /// Whether or not this is an optional dependency. + #[serde(default)] + pub optional: bool, + /// Whether or not default features are enabled. + #[serde(default = "default_true")] + pub default_features: bool, + /// The target platform for this dependency. + pub target: Option>, + /// The dependency kind. "dev", "build", and "normal". + pub kind: Option>, + // The URL of the index of the registry where this dependency is from. + // `None` if it is from the same index. + pub registry: Option>, + /// The original name if the dependency is renamed. + pub package: Option>, + /// Whether or not this is a public dependency. Unstable. See [RFC 1977]. + /// + /// [RFC 1977]: https://rust-lang.github.io/rfcs/1977-public-private-dependencies.html + pub public: Option, + pub artifact: Option>>, + pub bindep_target: Option>, + #[serde(default)] + pub lib: bool, +} + +fn default_true() -> bool { + true +} + +#[test] +fn escaped_char_in_index_json_blob() { + let _: IndexPackage<'_> = serde_json::from_str( + r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#, + ) + .unwrap(); + let _: IndexPackage<'_> = serde_json::from_str( + r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"# + ).unwrap(); + + // Now we add escaped cher all the places they can go + // these are not valid, but it should error later than json parsing + let _: IndexPackage<'_> = serde_json::from_str( + r#"{ + "name":"This name has a escaped cher in it \n\t\" ", + "vers":"0.0.1", + "deps":[{ + "name": " \n\t\" ", + "req": " \n\t\" ", + "features": [" \n\t\" "], + "optional": true, + "default_features": true, + "target": " \n\t\" ", + "kind": " \n\t\" ", + "registry": " \n\t\" " + }], + "cksum":"bae3", + "features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]}, + "links":" \n\t\" "}"#, + ) + .unwrap(); +} diff --git a/crates/cargo-util-schemas/src/lib.rs b/crates/cargo-util-schemas/src/lib.rs index a850c894a..9324480f1 100644 --- a/crates/cargo-util-schemas/src/lib.rs +++ b/crates/cargo-util-schemas/src/lib.rs @@ -9,6 +9,7 @@ //! > ecosystem. This crate follows semver compatibility for its APIs. pub mod core; +pub mod index; pub mod manifest; pub mod messages; #[cfg(feature = "unstable-schema")] diff --git a/src/cargo/ops/cargo_package/mod.rs b/src/cargo/ops/cargo_package/mod.rs index 3aa0babbd..3cc656dc0 100644 --- a/src/cargo/ops/cargo_package/mod.rs +++ b/src/cargo/ops/cargo_package/mod.rs @@ -19,7 +19,6 @@ use crate::core::{Package, PackageId, PackageSet, Resolve, SourceId}; use crate::ops::lockfile::LOCKFILE_NAME; use crate::ops::registry::{RegistryOrIndex, infer_registry}; use crate::sources::path::PathEntry; -use crate::sources::registry::index::{IndexPackage, RegistryDependency}; use crate::sources::{CRATES_IO_REGISTRY, PathSource}; use crate::util::FileLock; use crate::util::Filesystem; @@ -35,6 +34,7 @@ use crate::util::toml::prepare_for_publish; use crate::{drop_println, ops}; use anyhow::{Context as _, bail}; use cargo_util::paths; +use cargo_util_schemas::index::{IndexPackage, RegistryDependency}; use cargo_util_schemas::messages; use flate2::{Compression, GzBuilder}; use tar::{Builder, EntryType, Header, HeaderMode}; diff --git a/src/cargo/sources/registry/index/mod.rs b/src/cargo/sources/registry/index/mod.rs index f09fa7780..f2cd5ae39 100644 --- a/src/cargo/sources/registry/index/mod.rs +++ b/src/cargo/sources/registry/index/mod.rs @@ -28,6 +28,7 @@ use crate::util::IntoUrl; use crate::util::interning::InternedString; use crate::util::{CargoResult, Filesystem, GlobalContext, OptVersionReq, internal}; use cargo_util::registry::make_dep_path; +use cargo_util_schemas::index::{IndexPackage, RegistryDependency}; use cargo_util_schemas::manifest::RustVersion; use semver::Version; use serde::{Deserialize, Serialize}; @@ -194,97 +195,29 @@ impl IndexSummary { } } -/// A single line in the index representing a single version of a package. -#[derive(Deserialize, Serialize)] -pub struct IndexPackage<'a> { - /// Name of the package. - #[serde(borrow)] - pub name: Cow<'a, str>, - /// The version of this dependency. - pub vers: Version, - /// All kinds of direct dependencies of the package, including dev and - /// build dependencies. - #[serde(borrow)] - pub deps: Vec>, - /// Set of features defined for the package, i.e., `[features]` table. - #[serde(default)] - pub features: BTreeMap, Vec>>, - /// This field contains features with new, extended syntax. Specifically, - /// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`). - /// - /// This is separated from `features` because versions older than 1.19 - /// will fail to load due to not being able to parse the new syntax, even - /// with a `Cargo.lock` file. - pub features2: Option, Vec>>>, - /// Checksum for verifying the integrity of the corresponding downloaded package. - pub cksum: String, - /// If `true`, Cargo will skip this version when resolving. - /// - /// This was added in 2014. Everything in the crates.io index has this set - /// now, so this probably doesn't need to be an option anymore. - pub yanked: Option, - /// Native library name this package links to. - /// - /// Added early 2018 (see ), - /// can be `None` if published before then. - pub links: Option>, - /// Required version of rust - /// - /// Corresponds to `package.rust-version`. - /// - /// Added in 2023 (see ), - /// can be `None` if published before then or if not set in the manifest. - pub rust_version: Option, - /// The schema version for this entry. - /// - /// If this is None, it defaults to version `1`. Entries with unknown - /// versions are ignored. - /// - /// Version `2` schema adds the `features2` field. - /// - /// Version `3` schema adds `artifact`, `bindep_targes`, and `lib` for - /// artifact dependencies support. - /// - /// This provides a method to safely introduce changes to index entries - /// and allow older versions of cargo to ignore newer entries it doesn't - /// understand. This is honored as of 1.51, so unfortunately older - /// versions will ignore it, and potentially misinterpret version 2 and - /// newer entries. - /// - /// The intent is that versions older than 1.51 will work with a - /// pre-existing `Cargo.lock`, but they may not correctly process `cargo - /// update` or build a lock from scratch. In that case, cargo may - /// incorrectly select a new package that uses a new index schema. A - /// workaround is to downgrade any packages that are incompatible with the - /// `--precise` flag of `cargo update`. - pub v: Option, -} - -impl IndexPackage<'_> { - fn to_summary(&self, source_id: SourceId) -> CargoResult { - // ****CAUTION**** Please be extremely careful with returning errors, see - // `IndexSummary::parse` for details - let pkgid = PackageId::new(self.name.as_ref().into(), self.vers.clone(), source_id); - let deps = self - .deps - .iter() - .map(|dep| dep.clone().into_dep(source_id)) - .collect::>>()?; - let mut features = self.features.clone(); - if let Some(features2) = self.features2.clone() { - for (name, values) in features2 { - features.entry(name).or_default().extend(values); - } +fn index_package_to_summary(pkg: &IndexPackage<'_>, source_id: SourceId) -> CargoResult { + // ****CAUTION**** Please be extremely careful with returning errors, see + // `IndexSummary::parse` for details + let pkgid = PackageId::new(pkg.name.as_ref().into(), pkg.vers.clone(), source_id); + let deps = pkg + .deps + .iter() + .map(|dep| registry_dependency_into_dep(dep.clone(), source_id)) + .collect::>>()?; + let mut features = pkg.features.clone(); + if let Some(features2) = pkg.features2.clone() { + for (name, values) in features2 { + features.entry(name).or_default().extend(values); } - let features = features - .into_iter() - .map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect())) - .collect::>(); - let links: Option = self.links.as_ref().map(|l| l.as_ref().into()); - let mut summary = Summary::new(pkgid, deps, &features, links, self.rust_version.clone())?; - summary.set_checksum(self.cksum.clone()); - Ok(summary) } + let features = features + .into_iter() + .map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect())) + .collect::>(); + let links: Option = pkg.links.as_ref().map(|l| l.as_ref().into()); + let mut summary = Summary::new(pkgid, deps, &features, links, pkg.rust_version.clone())?; + summary.set_checksum(pkg.cksum.clone()); + Ok(summary) } #[derive(Deserialize, Serialize)] @@ -303,48 +236,6 @@ struct IndexPackageV { v: Option, } -/// A dependency as encoded in the [`IndexPackage`] index JSON. -#[derive(Deserialize, Serialize, Clone)] -pub struct RegistryDependency<'a> { - /// Name of the dependency. If the dependency is renamed, the original - /// would be stored in [`RegistryDependency::package`]. - #[serde(borrow)] - pub name: Cow<'a, str>, - /// The SemVer requirement for this dependency. - #[serde(borrow)] - pub req: Cow<'a, str>, - /// Set of features enabled for this dependency. - #[serde(default)] - pub features: Vec>, - /// Whether or not this is an optional dependency. - #[serde(default)] - pub optional: bool, - /// Whether or not default features are enabled. - #[serde(default = "default_true")] - pub default_features: bool, - /// The target platform for this dependency. - pub target: Option>, - /// The dependency kind. "dev", "build", and "normal". - pub kind: Option>, - // The URL of the index of the registry where this dependency is from. - // `None` if it is from the same index. - pub registry: Option>, - /// The original name if the dependency is renamed. - pub package: Option>, - /// Whether or not this is a public dependency. Unstable. See [RFC 1977]. - /// - /// [RFC 1977]: https://rust-lang.github.io/rfcs/1977-public-private-dependencies.html - pub public: Option, - pub artifact: Option>>, - pub bindep_target: Option>, - #[serde(default)] - pub lib: bool, -} - -fn default_true() -> bool { - true -} - impl<'gctx> RegistryIndex<'gctx> { /// Creates an empty registry index at `path`. pub fn new( @@ -753,7 +644,7 @@ impl IndexSummary { // values carefully when making changes here. let index_summary = (|| { let index = serde_json::from_slice::>(line)?; - let summary = index.to_summary(source_id)?; + let summary = index_package_to_summary(&index, source_id)?; Ok((index, summary)) })(); let (index, summary, valid) = match index_summary { @@ -784,7 +675,7 @@ impl IndexSummary { yanked: Default::default(), links: Default::default(), }; - let summary = index.to_summary(source_id)?; + let summary = index_package_to_summary(&index, source_id)?; (index, summary, false) } }; @@ -809,77 +700,78 @@ impl IndexSummary { } } -impl<'a> RegistryDependency<'a> { - /// Converts an encoded dependency in the registry to a cargo dependency - pub fn into_dep(self, default: SourceId) -> CargoResult { - let RegistryDependency { - name, - req, - mut features, - optional, - default_features, - target, - kind, - registry, - package, - public, - artifact, - bindep_target, - lib, - } = self; +/// Converts an encoded dependency in the registry to a cargo dependency +fn registry_dependency_into_dep( + dep: RegistryDependency<'_>, + default: SourceId, +) -> CargoResult { + let RegistryDependency { + name, + req, + mut features, + optional, + default_features, + target, + kind, + registry, + package, + public, + artifact, + bindep_target, + lib, + } = dep; - let id = if let Some(registry) = ®istry { - SourceId::for_registry(®istry.into_url()?)? - } else { - default - }; + let id = if let Some(registry) = ®istry { + SourceId::for_registry(®istry.into_url()?)? + } else { + default + }; - let interned_name = InternedString::new(package.as_ref().unwrap_or(&name)); - let mut dep = Dependency::parse(interned_name, Some(&req), id)?; - if package.is_some() { - dep.set_explicit_name_in_toml(name); - } - let kind = match kind.as_deref().unwrap_or("") { - "dev" => DepKind::Development, - "build" => DepKind::Build, - _ => DepKind::Normal, - }; - - let platform = match target { - Some(target) => Some(target.parse()?), - None => None, - }; - - // All dependencies are private by default - let public = public.unwrap_or(false); - - // Unfortunately older versions of cargo and/or the registry ended up - // publishing lots of entries where the features array contained the - // empty feature, "", inside. This confuses the resolution process much - // later on and these features aren't actually valid, so filter them all - // out here. - features.retain(|s| !s.is_empty()); - - // In index, "registry" is null if it is from the same index. - // In Cargo.toml, "registry" is None if it is from the default - if !id.is_crates_io() { - dep.set_registry_id(id); - } - - if let Some(artifacts) = artifact { - let artifact = Artifact::parse(&artifacts, lib, bindep_target.as_deref())?; - dep.set_artifact(artifact); - } - - dep.set_optional(optional) - .set_default_features(default_features) - .set_features(features) - .set_platform(platform) - .set_kind(kind) - .set_public(public); - - Ok(dep) + let interned_name = InternedString::new(package.as_ref().unwrap_or(&name)); + let mut dep = Dependency::parse(interned_name, Some(&req), id)?; + if package.is_some() { + dep.set_explicit_name_in_toml(name); } + let kind = match kind.as_deref().unwrap_or("") { + "dev" => DepKind::Development, + "build" => DepKind::Build, + _ => DepKind::Normal, + }; + + let platform = match target { + Some(target) => Some(target.parse()?), + None => None, + }; + + // All dependencies are private by default + let public = public.unwrap_or(false); + + // Unfortunately older versions of cargo and/or the registry ended up + // publishing lots of entries where the features array contained the + // empty feature, "", inside. This confuses the resolution process much + // later on and these features aren't actually valid, so filter them all + // out here. + features.retain(|s| !s.is_empty()); + + // In index, "registry" is null if it is from the same index. + // In Cargo.toml, "registry" is None if it is from the default + if !id.is_crates_io() { + dep.set_registry_id(id); + } + + if let Some(artifacts) = artifact { + let artifact = Artifact::parse(&artifacts, lib, bindep_target.as_deref())?; + dep.set_artifact(artifact); + } + + dep.set_optional(optional) + .set_default_features(default_features) + .set_features(features) + .set_platform(platform) + .set_kind(kind) + .set_public(public); + + Ok(dep) } /// Like [`slice::split`] but is optimized by [`memchr`]. @@ -907,36 +799,3 @@ fn split(haystack: &[u8], needle: u8) -> impl Iterator { Split { haystack, needle } } - -#[test] -fn escaped_char_in_index_json_blob() { - let _: IndexPackage<'_> = serde_json::from_str( - r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#, - ) - .unwrap(); - let _: IndexPackage<'_> = serde_json::from_str( - r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"# - ).unwrap(); - - // Now we add escaped cher all the places they can go - // these are not valid, but it should error later than json parsing - let _: IndexPackage<'_> = serde_json::from_str( - r#"{ - "name":"This name has a escaped cher in it \n\t\" ", - "vers":"0.0.1", - "deps":[{ - "name": " \n\t\" ", - "req": " \n\t\" ", - "features": [" \n\t\" "], - "optional": true, - "default_features": true, - "target": " \n\t\" ", - "kind": " \n\t\" ", - "registry": " \n\t\" " - }], - "cksum":"bae3", - "features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]}, - "links":" \n\t\" "}"#, - ) - .unwrap(); -} diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 4d351f786..69bb2d875 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -73,7 +73,7 @@ //! about the format of the registry: //! //! 1. Each crate will have one file corresponding to it. Each version for a -//! crate will just be a line in this file (see [`IndexPackage`] for its +//! crate will just be a line in this file (see [`cargo_util_schemas::index::IndexPackage`] for its //! representation). //! 2. There will be two tiers of directories for crate names, under which //! crates corresponding to those tiers will be located. @@ -125,7 +125,7 @@ //! //! Each file in the index is the history of one crate over time. Each line in //! the file corresponds to one version of a crate, stored in JSON format (see -//! the [`IndexPackage`] structure). +//! the [`cargo_util_schemas::index::IndexPackage`] structure). //! //! As new versions are published, new lines are appended to this file. **The //! only modifications to this file that should happen over time are yanks of a @@ -181,7 +181,6 @@ //! ... //! ``` //! -//! [`IndexPackage`]: index::IndexPackage use std::collections::HashSet; use std::fs;