refactor: move IndexPackage and RegistryDependency to cargo-util-schemas for better modularity

Signed-off-by: 0xPoe <techregister@pm.me>
This commit is contained in:
0xPoe 2025-07-23 22:09:13 +02:00
parent dfcf4c2cbd
commit a7fcef21fe
6 changed files with 243 additions and 238 deletions

View File

@ -25,6 +25,7 @@ workspace = true
[dev-dependencies]
snapbox.workspace = true
serde_json.workspace = true
[features]
unstable-schema = ["dep:schemars", "dep:serde_json"]

View File

@ -0,0 +1,145 @@
use crate::manifest::RustVersion;
use semver::Version;
use serde::{Deserialize, Serialize};
use std::{borrow::Cow, collections::BTreeMap};
/// A single line in the index representing a single version of a package.
#[derive(Deserialize, Serialize)]
pub struct IndexPackage<'a> {
/// Name of the package.
#[serde(borrow)]
pub name: Cow<'a, str>,
/// The version of this dependency.
pub vers: Version,
/// All kinds of direct dependencies of the package, including dev and
/// build dependencies.
#[serde(borrow)]
pub deps: Vec<RegistryDependency<'a>>,
/// Set of features defined for the package, i.e., `[features]` table.
#[serde(default)]
pub features: BTreeMap<Cow<'a, str>, Vec<Cow<'a, str>>>,
/// This field contains features with new, extended syntax. Specifically,
/// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
///
/// This is separated from `features` because versions older than 1.19
/// will fail to load due to not being able to parse the new syntax, even
/// with a `Cargo.lock` file.
pub features2: Option<BTreeMap<Cow<'a, str>, Vec<Cow<'a, str>>>>,
/// Checksum for verifying the integrity of the corresponding downloaded package.
pub cksum: String,
/// If `true`, Cargo will skip this version when resolving.
///
/// This was added in 2014. Everything in the crates.io index has this set
/// now, so this probably doesn't need to be an option anymore.
pub yanked: Option<bool>,
/// Native library name this package links to.
///
/// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
/// can be `None` if published before then.
pub links: Option<Cow<'a, str>>,
/// Required version of rust
///
/// Corresponds to `package.rust-version`.
///
/// Added in 2023 (see <https://github.com/rust-lang/crates.io/pull/6267>),
/// can be `None` if published before then or if not set in the manifest.
pub rust_version: Option<RustVersion>,
/// The schema version for this entry.
///
/// If this is None, it defaults to version `1`. Entries with unknown
/// versions are ignored.
///
/// Version `2` schema adds the `features2` field.
///
/// Version `3` schema adds `artifact`, `bindep_targes`, and `lib` for
/// artifact dependencies support.
///
/// This provides a method to safely introduce changes to index entries
/// and allow older versions of cargo to ignore newer entries it doesn't
/// understand. This is honored as of 1.51, so unfortunately older
/// versions will ignore it, and potentially misinterpret version 2 and
/// newer entries.
///
/// The intent is that versions older than 1.51 will work with a
/// pre-existing `Cargo.lock`, but they may not correctly process `cargo
/// update` or build a lock from scratch. In that case, cargo may
/// incorrectly select a new package that uses a new index schema. A
/// workaround is to downgrade any packages that are incompatible with the
/// `--precise` flag of `cargo update`.
pub v: Option<u32>,
}
/// A dependency as encoded in the [`IndexPackage`] index JSON.
#[derive(Deserialize, Serialize, Clone)]
pub struct RegistryDependency<'a> {
/// Name of the dependency. If the dependency is renamed, the original
/// would be stored in [`RegistryDependency::package`].
#[serde(borrow)]
pub name: Cow<'a, str>,
/// The SemVer requirement for this dependency.
#[serde(borrow)]
pub req: Cow<'a, str>,
/// Set of features enabled for this dependency.
#[serde(default)]
pub features: Vec<Cow<'a, str>>,
/// Whether or not this is an optional dependency.
#[serde(default)]
pub optional: bool,
/// Whether or not default features are enabled.
#[serde(default = "default_true")]
pub default_features: bool,
/// The target platform for this dependency.
pub target: Option<Cow<'a, str>>,
/// The dependency kind. "dev", "build", and "normal".
pub kind: Option<Cow<'a, str>>,
// The URL of the index of the registry where this dependency is from.
// `None` if it is from the same index.
pub registry: Option<Cow<'a, str>>,
/// The original name if the dependency is renamed.
pub package: Option<Cow<'a, str>>,
/// Whether or not this is a public dependency. Unstable. See [RFC 1977].
///
/// [RFC 1977]: https://rust-lang.github.io/rfcs/1977-public-private-dependencies.html
pub public: Option<bool>,
pub artifact: Option<Vec<Cow<'a, str>>>,
pub bindep_target: Option<Cow<'a, str>>,
#[serde(default)]
pub lib: bool,
}
fn default_true() -> bool {
true
}
#[test]
fn escaped_char_in_index_json_blob() {
let _: IndexPackage<'_> = serde_json::from_str(
r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#,
)
.unwrap();
let _: IndexPackage<'_> = serde_json::from_str(
r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"#
).unwrap();
// Now we add escaped cher all the places they can go
// these are not valid, but it should error later than json parsing
let _: IndexPackage<'_> = serde_json::from_str(
r#"{
"name":"This name has a escaped cher in it \n\t\" ",
"vers":"0.0.1",
"deps":[{
"name": " \n\t\" ",
"req": " \n\t\" ",
"features": [" \n\t\" "],
"optional": true,
"default_features": true,
"target": " \n\t\" ",
"kind": " \n\t\" ",
"registry": " \n\t\" "
}],
"cksum":"bae3",
"features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]},
"links":" \n\t\" "}"#,
)
.unwrap();
}

View File

@ -9,6 +9,7 @@
//! > ecosystem. This crate follows semver compatibility for its APIs.
pub mod core;
pub mod index;
pub mod manifest;
pub mod messages;
#[cfg(feature = "unstable-schema")]

View File

@ -19,7 +19,6 @@ use crate::core::{Package, PackageId, PackageSet, Resolve, SourceId};
use crate::ops::lockfile::LOCKFILE_NAME;
use crate::ops::registry::{RegistryOrIndex, infer_registry};
use crate::sources::path::PathEntry;
use crate::sources::registry::index::{IndexPackage, RegistryDependency};
use crate::sources::{CRATES_IO_REGISTRY, PathSource};
use crate::util::FileLock;
use crate::util::Filesystem;
@ -35,6 +34,7 @@ use crate::util::toml::prepare_for_publish;
use crate::{drop_println, ops};
use anyhow::{Context as _, bail};
use cargo_util::paths;
use cargo_util_schemas::index::{IndexPackage, RegistryDependency};
use cargo_util_schemas::messages;
use flate2::{Compression, GzBuilder};
use tar::{Builder, EntryType, Header, HeaderMode};

View File

@ -28,6 +28,7 @@ use crate::util::IntoUrl;
use crate::util::interning::InternedString;
use crate::util::{CargoResult, Filesystem, GlobalContext, OptVersionReq, internal};
use cargo_util::registry::make_dep_path;
use cargo_util_schemas::index::{IndexPackage, RegistryDependency};
use cargo_util_schemas::manifest::RustVersion;
use semver::Version;
use serde::{Deserialize, Serialize};
@ -194,97 +195,29 @@ impl IndexSummary {
}
}
/// A single line in the index representing a single version of a package.
#[derive(Deserialize, Serialize)]
pub struct IndexPackage<'a> {
/// Name of the package.
#[serde(borrow)]
pub name: Cow<'a, str>,
/// The version of this dependency.
pub vers: Version,
/// All kinds of direct dependencies of the package, including dev and
/// build dependencies.
#[serde(borrow)]
pub deps: Vec<RegistryDependency<'a>>,
/// Set of features defined for the package, i.e., `[features]` table.
#[serde(default)]
pub features: BTreeMap<Cow<'a, str>, Vec<Cow<'a, str>>>,
/// This field contains features with new, extended syntax. Specifically,
/// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
///
/// This is separated from `features` because versions older than 1.19
/// will fail to load due to not being able to parse the new syntax, even
/// with a `Cargo.lock` file.
pub features2: Option<BTreeMap<Cow<'a, str>, Vec<Cow<'a, str>>>>,
/// Checksum for verifying the integrity of the corresponding downloaded package.
pub cksum: String,
/// If `true`, Cargo will skip this version when resolving.
///
/// This was added in 2014. Everything in the crates.io index has this set
/// now, so this probably doesn't need to be an option anymore.
pub yanked: Option<bool>,
/// Native library name this package links to.
///
/// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
/// can be `None` if published before then.
pub links: Option<Cow<'a, str>>,
/// Required version of rust
///
/// Corresponds to `package.rust-version`.
///
/// Added in 2023 (see <https://github.com/rust-lang/crates.io/pull/6267>),
/// can be `None` if published before then or if not set in the manifest.
pub rust_version: Option<RustVersion>,
/// The schema version for this entry.
///
/// If this is None, it defaults to version `1`. Entries with unknown
/// versions are ignored.
///
/// Version `2` schema adds the `features2` field.
///
/// Version `3` schema adds `artifact`, `bindep_targes`, and `lib` for
/// artifact dependencies support.
///
/// This provides a method to safely introduce changes to index entries
/// and allow older versions of cargo to ignore newer entries it doesn't
/// understand. This is honored as of 1.51, so unfortunately older
/// versions will ignore it, and potentially misinterpret version 2 and
/// newer entries.
///
/// The intent is that versions older than 1.51 will work with a
/// pre-existing `Cargo.lock`, but they may not correctly process `cargo
/// update` or build a lock from scratch. In that case, cargo may
/// incorrectly select a new package that uses a new index schema. A
/// workaround is to downgrade any packages that are incompatible with the
/// `--precise` flag of `cargo update`.
pub v: Option<u32>,
}
impl IndexPackage<'_> {
fn to_summary(&self, source_id: SourceId) -> CargoResult<Summary> {
// ****CAUTION**** Please be extremely careful with returning errors, see
// `IndexSummary::parse` for details
let pkgid = PackageId::new(self.name.as_ref().into(), self.vers.clone(), source_id);
let deps = self
.deps
.iter()
.map(|dep| dep.clone().into_dep(source_id))
.collect::<CargoResult<Vec<_>>>()?;
let mut features = self.features.clone();
if let Some(features2) = self.features2.clone() {
for (name, values) in features2 {
features.entry(name).or_default().extend(values);
}
fn index_package_to_summary(pkg: &IndexPackage<'_>, source_id: SourceId) -> CargoResult<Summary> {
// ****CAUTION**** Please be extremely careful with returning errors, see
// `IndexSummary::parse` for details
let pkgid = PackageId::new(pkg.name.as_ref().into(), pkg.vers.clone(), source_id);
let deps = pkg
.deps
.iter()
.map(|dep| registry_dependency_into_dep(dep.clone(), source_id))
.collect::<CargoResult<Vec<_>>>()?;
let mut features = pkg.features.clone();
if let Some(features2) = pkg.features2.clone() {
for (name, values) in features2 {
features.entry(name).or_default().extend(values);
}
let features = features
.into_iter()
.map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect()))
.collect::<BTreeMap<_, _>>();
let links: Option<InternedString> = self.links.as_ref().map(|l| l.as_ref().into());
let mut summary = Summary::new(pkgid, deps, &features, links, self.rust_version.clone())?;
summary.set_checksum(self.cksum.clone());
Ok(summary)
}
let features = features
.into_iter()
.map(|(name, values)| (name.into(), values.into_iter().map(|v| v.into()).collect()))
.collect::<BTreeMap<_, _>>();
let links: Option<InternedString> = pkg.links.as_ref().map(|l| l.as_ref().into());
let mut summary = Summary::new(pkgid, deps, &features, links, pkg.rust_version.clone())?;
summary.set_checksum(pkg.cksum.clone());
Ok(summary)
}
#[derive(Deserialize, Serialize)]
@ -303,48 +236,6 @@ struct IndexPackageV {
v: Option<u32>,
}
/// A dependency as encoded in the [`IndexPackage`] index JSON.
#[derive(Deserialize, Serialize, Clone)]
pub struct RegistryDependency<'a> {
/// Name of the dependency. If the dependency is renamed, the original
/// would be stored in [`RegistryDependency::package`].
#[serde(borrow)]
pub name: Cow<'a, str>,
/// The SemVer requirement for this dependency.
#[serde(borrow)]
pub req: Cow<'a, str>,
/// Set of features enabled for this dependency.
#[serde(default)]
pub features: Vec<Cow<'a, str>>,
/// Whether or not this is an optional dependency.
#[serde(default)]
pub optional: bool,
/// Whether or not default features are enabled.
#[serde(default = "default_true")]
pub default_features: bool,
/// The target platform for this dependency.
pub target: Option<Cow<'a, str>>,
/// The dependency kind. "dev", "build", and "normal".
pub kind: Option<Cow<'a, str>>,
// The URL of the index of the registry where this dependency is from.
// `None` if it is from the same index.
pub registry: Option<Cow<'a, str>>,
/// The original name if the dependency is renamed.
pub package: Option<Cow<'a, str>>,
/// Whether or not this is a public dependency. Unstable. See [RFC 1977].
///
/// [RFC 1977]: https://rust-lang.github.io/rfcs/1977-public-private-dependencies.html
pub public: Option<bool>,
pub artifact: Option<Vec<Cow<'a, str>>>,
pub bindep_target: Option<Cow<'a, str>>,
#[serde(default)]
pub lib: bool,
}
fn default_true() -> bool {
true
}
impl<'gctx> RegistryIndex<'gctx> {
/// Creates an empty registry index at `path`.
pub fn new(
@ -753,7 +644,7 @@ impl IndexSummary {
// values carefully when making changes here.
let index_summary = (|| {
let index = serde_json::from_slice::<IndexPackage<'_>>(line)?;
let summary = index.to_summary(source_id)?;
let summary = index_package_to_summary(&index, source_id)?;
Ok((index, summary))
})();
let (index, summary, valid) = match index_summary {
@ -784,7 +675,7 @@ impl IndexSummary {
yanked: Default::default(),
links: Default::default(),
};
let summary = index.to_summary(source_id)?;
let summary = index_package_to_summary(&index, source_id)?;
(index, summary, false)
}
};
@ -809,77 +700,78 @@ impl IndexSummary {
}
}
impl<'a> RegistryDependency<'a> {
/// Converts an encoded dependency in the registry to a cargo dependency
pub fn into_dep(self, default: SourceId) -> CargoResult<Dependency> {
let RegistryDependency {
name,
req,
mut features,
optional,
default_features,
target,
kind,
registry,
package,
public,
artifact,
bindep_target,
lib,
} = self;
/// Converts an encoded dependency in the registry to a cargo dependency
fn registry_dependency_into_dep(
dep: RegistryDependency<'_>,
default: SourceId,
) -> CargoResult<Dependency> {
let RegistryDependency {
name,
req,
mut features,
optional,
default_features,
target,
kind,
registry,
package,
public,
artifact,
bindep_target,
lib,
} = dep;
let id = if let Some(registry) = &registry {
SourceId::for_registry(&registry.into_url()?)?
} else {
default
};
let id = if let Some(registry) = &registry {
SourceId::for_registry(&registry.into_url()?)?
} else {
default
};
let interned_name = InternedString::new(package.as_ref().unwrap_or(&name));
let mut dep = Dependency::parse(interned_name, Some(&req), id)?;
if package.is_some() {
dep.set_explicit_name_in_toml(name);
}
let kind = match kind.as_deref().unwrap_or("") {
"dev" => DepKind::Development,
"build" => DepKind::Build,
_ => DepKind::Normal,
};
let platform = match target {
Some(target) => Some(target.parse()?),
None => None,
};
// All dependencies are private by default
let public = public.unwrap_or(false);
// Unfortunately older versions of cargo and/or the registry ended up
// publishing lots of entries where the features array contained the
// empty feature, "", inside. This confuses the resolution process much
// later on and these features aren't actually valid, so filter them all
// out here.
features.retain(|s| !s.is_empty());
// In index, "registry" is null if it is from the same index.
// In Cargo.toml, "registry" is None if it is from the default
if !id.is_crates_io() {
dep.set_registry_id(id);
}
if let Some(artifacts) = artifact {
let artifact = Artifact::parse(&artifacts, lib, bindep_target.as_deref())?;
dep.set_artifact(artifact);
}
dep.set_optional(optional)
.set_default_features(default_features)
.set_features(features)
.set_platform(platform)
.set_kind(kind)
.set_public(public);
Ok(dep)
let interned_name = InternedString::new(package.as_ref().unwrap_or(&name));
let mut dep = Dependency::parse(interned_name, Some(&req), id)?;
if package.is_some() {
dep.set_explicit_name_in_toml(name);
}
let kind = match kind.as_deref().unwrap_or("") {
"dev" => DepKind::Development,
"build" => DepKind::Build,
_ => DepKind::Normal,
};
let platform = match target {
Some(target) => Some(target.parse()?),
None => None,
};
// All dependencies are private by default
let public = public.unwrap_or(false);
// Unfortunately older versions of cargo and/or the registry ended up
// publishing lots of entries where the features array contained the
// empty feature, "", inside. This confuses the resolution process much
// later on and these features aren't actually valid, so filter them all
// out here.
features.retain(|s| !s.is_empty());
// In index, "registry" is null if it is from the same index.
// In Cargo.toml, "registry" is None if it is from the default
if !id.is_crates_io() {
dep.set_registry_id(id);
}
if let Some(artifacts) = artifact {
let artifact = Artifact::parse(&artifacts, lib, bindep_target.as_deref())?;
dep.set_artifact(artifact);
}
dep.set_optional(optional)
.set_default_features(default_features)
.set_features(features)
.set_platform(platform)
.set_kind(kind)
.set_public(public);
Ok(dep)
}
/// Like [`slice::split`] but is optimized by [`memchr`].
@ -907,36 +799,3 @@ fn split(haystack: &[u8], needle: u8) -> impl Iterator<Item = &[u8]> {
Split { haystack, needle }
}
#[test]
fn escaped_char_in_index_json_blob() {
let _: IndexPackage<'_> = serde_json::from_str(
r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#,
)
.unwrap();
let _: IndexPackage<'_> = serde_json::from_str(
r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"#
).unwrap();
// Now we add escaped cher all the places they can go
// these are not valid, but it should error later than json parsing
let _: IndexPackage<'_> = serde_json::from_str(
r#"{
"name":"This name has a escaped cher in it \n\t\" ",
"vers":"0.0.1",
"deps":[{
"name": " \n\t\" ",
"req": " \n\t\" ",
"features": [" \n\t\" "],
"optional": true,
"default_features": true,
"target": " \n\t\" ",
"kind": " \n\t\" ",
"registry": " \n\t\" "
}],
"cksum":"bae3",
"features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]},
"links":" \n\t\" "}"#,
)
.unwrap();
}

View File

@ -73,7 +73,7 @@
//! about the format of the registry:
//!
//! 1. Each crate will have one file corresponding to it. Each version for a
//! crate will just be a line in this file (see [`IndexPackage`] for its
//! crate will just be a line in this file (see [`cargo_util_schemas::index::IndexPackage`] for its
//! representation).
//! 2. There will be two tiers of directories for crate names, under which
//! crates corresponding to those tiers will be located.
@ -125,7 +125,7 @@
//!
//! Each file in the index is the history of one crate over time. Each line in
//! the file corresponds to one version of a crate, stored in JSON format (see
//! the [`IndexPackage`] structure).
//! the [`cargo_util_schemas::index::IndexPackage`] structure).
//!
//! As new versions are published, new lines are appended to this file. **The
//! only modifications to this file that should happen over time are yanks of a
@ -181,7 +181,6 @@
//! ...
//! ```
//!
//! [`IndexPackage`]: index::IndexPackage
use std::collections::HashSet;
use std::fs;