From 3184f03ef3026f6687e06ec32dd9308775d68e3a Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 25 Aug 2025 16:30:26 -0500 Subject: [PATCH 1/6] refactor(frontmatter): Switch to winnow --- Cargo.lock | 1 + Cargo.toml | 2 ++ src/cargo/util/frontmatter.rs | 44 ++++++++++++++++++++--------------- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c401a80c8..9adf1b1f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -399,6 +399,7 @@ dependencies = [ "url", "walkdir", "windows-sys 0.60.2", + "winnow", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4684ef0ec..c2e4976cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -118,6 +118,7 @@ url = "2.5.4" varisat = "0.2.2" walkdir = "2.5.0" windows-sys = "0.60" +winnow = "0.7.13" [workspace.lints.rust] rust_2018_idioms = "warn" # TODO: could this be removed? @@ -220,6 +221,7 @@ unicode-width.workspace = true unicode-xid.workspace = true url.workspace = true walkdir.workspace = true +winnow.workspace = true [target.'cfg(target_has_atomic = "64")'.dependencies] tracing-chrome.workspace = true diff --git a/src/cargo/util/frontmatter.rs b/src/cargo/util/frontmatter.rs index 022973035..3abb4f6d1 100644 --- a/src/cargo/util/frontmatter.rs +++ b/src/cargo/util/frontmatter.rs @@ -10,6 +10,9 @@ pub struct ScriptSource<'s> { impl<'s> ScriptSource<'s> { pub fn parse(input: &'s str) -> CargoResult { + use winnow::stream::FindSlice as _; + use winnow::stream::Stream as _; + let mut source = Self { shebang: None, info: None, @@ -17,25 +20,25 @@ impl<'s> ScriptSource<'s> { content: input, }; - if let Some(shebang_end) = strip_shebang(source.content) { - let (shebang, content) = source.content.split_at(shebang_end); - source.shebang = Some(shebang); - source.content = content; + let mut input = winnow::stream::LocatingSlice::new(input); + + if let Some(shebang_end) = strip_shebang(input.as_ref()) { + source.shebang = Some(input.next_slice(shebang_end)); + source.content = input.as_ref(); } - let mut rest = source.content; - // Whitespace may precede a frontmatter but must end with a newline - if let Some(nl_end) = strip_ws_lines(rest) { - rest = &rest[nl_end..]; + if let Some(nl_end) = strip_ws_lines(input.as_ref()) { + let _ = input.next_slice(nl_end); } // Opens with a line that starts with 3 or more `-` followed by an optional identifier const FENCE_CHAR: char = '-'; - let fence_length = rest + let fence_length = input + .as_ref() .char_indices() .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i)) - .unwrap_or(rest.len()); + .unwrap_or_else(|| input.eof_offset()); match fence_length { 0 => { return Ok(source); @@ -48,11 +51,11 @@ impl<'s> ScriptSource<'s> { } _ => {} } - let (fence_pattern, rest) = rest.split_at(fence_length); - let Some(info_end_index) = rest.find('\n') else { + let fence_pattern = input.next_slice(fence_length); + let Some(info_nl) = input.find_slice("\n") else { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; - let (info, rest) = rest.split_at(info_end_index); + let info = input.next_slice(info_nl.start); let info = info.trim_matches(is_whitespace); if !info.is_empty() { source.info = Some(info); @@ -60,25 +63,28 @@ impl<'s> ScriptSource<'s> { // Ends with a line that starts with a matching number of `-` only followed by whitespace let nl_fence_pattern = format!("\n{fence_pattern}"); - let Some(frontmatter_nl) = rest.find(&nl_fence_pattern) else { + let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; - let frontmatter = &rest[..frontmatter_nl + 1]; + let frontmatter = input.next_slice(frontmatter_nl.start + 1); let frontmatter = frontmatter .strip_prefix('\n') .expect("earlier `found` + `split_at` left us here"); source.frontmatter = Some(frontmatter); - let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..]; + let _ = input.next_slice(fence_length); - let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, "")); + let nl = input.find_slice("\n"); + let after_closing_fence = input.next_slice( + nl.map(|span| span.end) + .unwrap_or_else(|| input.eof_offset()), + ); let after_closing_fence = after_closing_fence.trim_matches(is_whitespace); if !after_closing_fence.is_empty() { // extra characters beyond the original fence pattern, even if they are extra `-` anyhow::bail!("trailing characters found after frontmatter close"); } - let frontmatter_len = input.len() - rest.len(); - source.content = &input[frontmatter_len..]; + source.content = input.finish(); let repeat = Self::parse(source.content)?; if repeat.frontmatter.is_some() { From e9c744b8de50b0ee30ec6728f8917dadc060fae4 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 25 Aug 2025 16:28:39 -0500 Subject: [PATCH 2/6] refactor(frontmatter): Track spans This will make error reporting easier as well as editing --- src/cargo/util/frontmatter.rs | 54 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/src/cargo/util/frontmatter.rs b/src/cargo/util/frontmatter.rs index 3abb4f6d1..2af9d67ab 100644 --- a/src/cargo/util/frontmatter.rs +++ b/src/cargo/util/frontmatter.rs @@ -1,30 +1,40 @@ use crate::CargoResult; +type Span = std::ops::Range; + #[derive(Debug)] pub struct ScriptSource<'s> { - shebang: Option<&'s str>, - info: Option<&'s str>, - frontmatter: Option<&'s str>, - content: &'s str, + raw: &'s str, + shebang: Option, + info: Option, + frontmatter: Option, + content: Span, } impl<'s> ScriptSource<'s> { - pub fn parse(input: &'s str) -> CargoResult { + pub fn parse(raw: &'s str) -> CargoResult { use winnow::stream::FindSlice as _; + use winnow::stream::Location as _; + use winnow::stream::Offset as _; use winnow::stream::Stream as _; + let content_end = raw.len(); let mut source = Self { + raw, shebang: None, info: None, frontmatter: None, - content: input, + content: 0..content_end, }; - let mut input = winnow::stream::LocatingSlice::new(input); + let mut input = winnow::stream::LocatingSlice::new(raw); if let Some(shebang_end) = strip_shebang(input.as_ref()) { - source.shebang = Some(input.next_slice(shebang_end)); - source.content = input.as_ref(); + let shebang_start = input.current_token_start(); + let _ = input.next_slice(shebang_end); + let shebang_end = input.current_token_start(); + source.shebang = Some(shebang_start..shebang_end); + source.content = shebang_end..content_end; } // Whitespace may precede a frontmatter but must end with a newline @@ -58,7 +68,9 @@ impl<'s> ScriptSource<'s> { let info = input.next_slice(info_nl.start); let info = info.trim_matches(is_whitespace); if !info.is_empty() { - source.info = Some(info); + let info_start = info.offset_from(&raw); + let info_end = info_start + info.len(); + source.info = Some(info_start..info_end); } // Ends with a line that starts with a matching number of `-` only followed by whitespace @@ -66,11 +78,10 @@ impl<'s> ScriptSource<'s> { let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; - let frontmatter = input.next_slice(frontmatter_nl.start + 1); - let frontmatter = frontmatter - .strip_prefix('\n') - .expect("earlier `found` + `split_at` left us here"); - source.frontmatter = Some(frontmatter); + let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring + let _ = input.next_slice(frontmatter_nl.start + 1); + let frontmatter_end = input.current_token_start(); + source.frontmatter = Some(frontmatter_start..frontmatter_end); let _ = input.next_slice(fence_length); let nl = input.find_slice("\n"); @@ -78,15 +89,16 @@ impl<'s> ScriptSource<'s> { nl.map(|span| span.end) .unwrap_or_else(|| input.eof_offset()), ); + let content_start = input.current_token_start(); let after_closing_fence = after_closing_fence.trim_matches(is_whitespace); if !after_closing_fence.is_empty() { // extra characters beyond the original fence pattern, even if they are extra `-` anyhow::bail!("trailing characters found after frontmatter close"); } - source.content = input.finish(); + source.content = content_start..content_end; - let repeat = Self::parse(source.content)?; + let repeat = Self::parse(source.content())?; if repeat.frontmatter.is_some() { anyhow::bail!("only one frontmatter is supported"); } @@ -95,19 +107,19 @@ impl<'s> ScriptSource<'s> { } pub fn shebang(&self) -> Option<&'s str> { - self.shebang + self.shebang.clone().map(|span| &self.raw[span]) } pub fn info(&self) -> Option<&'s str> { - self.info + self.info.clone().map(|span| &self.raw[span]) } pub fn frontmatter(&self) -> Option<&'s str> { - self.frontmatter + self.frontmatter.clone().map(|span| &self.raw[span]) } pub fn content(&self) -> &'s str { - self.content + &self.raw[self.content.clone()] } } From 67dc504f946248169290d0662193b6db074a497c Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 26 Aug 2025 10:28:09 -0500 Subject: [PATCH 3/6] refactor(manifest): Simplify frontmatter editing --- src/cargo/util/frontmatter.rs | 16 +++++++++++++ src/cargo/util/toml_mut/manifest.rs | 36 ++++++++--------------------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/cargo/util/frontmatter.rs b/src/cargo/util/frontmatter.rs index 2af9d67ab..9c9a9a7ff 100644 --- a/src/cargo/util/frontmatter.rs +++ b/src/cargo/util/frontmatter.rs @@ -110,17 +110,33 @@ impl<'s> ScriptSource<'s> { self.shebang.clone().map(|span| &self.raw[span]) } + pub fn shebang_span(&self) -> Option { + self.shebang.clone() + } + pub fn info(&self) -> Option<&'s str> { self.info.clone().map(|span| &self.raw[span]) } + pub fn info_span(&self) -> Option { + self.info.clone() + } + pub fn frontmatter(&self) -> Option<&'s str> { self.frontmatter.clone().map(|span| &self.raw[span]) } + pub fn frontmatter_span(&self) -> Option { + self.frontmatter.clone() + } + pub fn content(&self) -> &'s str { &self.raw[self.content.clone()] } + + pub fn content_span(&self) -> Span { + self.content.clone() + } } /// Returns the index after the shebang line, if present diff --git a/src/cargo/util/toml_mut/manifest.rs b/src/cargo/util/toml_mut/manifest.rs index 13e1776e7..de810f238 100644 --- a/src/cargo/util/toml_mut/manifest.rs +++ b/src/cargo/util/toml_mut/manifest.rs @@ -277,11 +277,11 @@ impl LocalManifest { let mut embedded = None; if is_embedded(path) { let source = ScriptSource::parse(&data)?; - if let Some(frontmatter) = source.frontmatter() { - embedded = Some(Embedded::exists(&data, frontmatter)); - data = frontmatter.to_owned(); - } else if let Some(shebang) = source.shebang() { - embedded = Some(Embedded::after(&data, shebang)); + if let Some(frontmatter) = source.frontmatter_span() { + embedded = Some(Embedded::exists(frontmatter)); + data = source.frontmatter().unwrap().to_owned(); + } else if let Some(shebang) = source.shebang_span() { + embedded = Some(Embedded::after(shebang)); data = String::new(); } else { embedded = Some(Embedded::start()); @@ -592,33 +592,15 @@ impl Embedded { Self::Implicit(0) } - fn after(input: &str, after: &str) -> Self { - let span = substr_span(input, after); - let end = span.end; - Self::Implicit(end) + fn after(after: std::ops::Range) -> Self { + Self::Implicit(after.end) } - fn exists(input: &str, exists: &str) -> Self { - let span = substr_span(input, exists); - Self::Explicit(span) + fn exists(exists: std::ops::Range) -> Self { + Self::Explicit(exists) } } -fn substr_span(haystack: &str, needle: &str) -> std::ops::Range { - let haystack_start_ptr = haystack.as_ptr(); - let haystack_end_ptr = haystack[haystack.len()..haystack.len()].as_ptr(); - - let needle_start_ptr = needle.as_ptr(); - let needle_end_ptr = needle[needle.len()..needle.len()].as_ptr(); - - assert!(needle_end_ptr < haystack_end_ptr); - assert!(haystack_start_ptr <= needle_start_ptr); - let start = needle_start_ptr as usize - haystack_start_ptr as usize; - let end = start + needle.len(); - - start..end -} - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] enum DependencyStatus { None, From 578ab7cc452781958b36cf13c52c2465f5b650ec Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 4 Sep 2025 14:52:58 -0500 Subject: [PATCH 4/6] test(script): Show line numbers are off --- tests/testsuite/script/cargo.rs | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/testsuite/script/cargo.rs b/tests/testsuite/script/cargo.rs index a7f23cfc2..f48415400 100644 --- a/tests/testsuite/script/cargo.rs +++ b/tests/testsuite/script/cargo.rs @@ -182,6 +182,42 @@ fn requires_z_flag() { .run(); } +#[cargo_test(nightly, reason = "-Zscript is unstable")] +fn manifest_parse_error() { + // Exagerate the newlines to make it more obvious if the error's line number is off + let script = r#"#!/usr/bin/env cargo + + + + + +--- +[dependencies] +bar = 3 +--- + +fn main() { + println!("Hello world!"); +}"#; + let p = cargo_test_support::project() + .file("script.rs", script) + .build(); + + p.cargo("-Zscript -v script.rs") + .masquerade_as_nightly_cargo(&["script"]) + .with_status(101) + .with_stdout_data(str![""]) + .with_stderr_data(str![[r#" +[ERROR] invalid type: integer `3`, expected a version string like "0.9.8" or a detailed dependency like { version = "0.9.8" } + --> script.rs:2:7 + | +2 | bar = 3 + | ^ + +"#]]) + .run(); +} + #[cargo_test(nightly, reason = "-Zscript is unstable")] fn clean_output_with_edition() { let script = r#"#!/usr/bin/env cargo From 4909d789730f9352b19f5514162e7015ebdbf3e6 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 4 Sep 2025 16:15:04 -0500 Subject: [PATCH 5/6] fix(frontmatter): Report script manifest errors for the right line number --- src/cargo/util/frontmatter.rs | 18 ++++++++++++++ src/cargo/util/toml/embedded.rs | 24 ++++++++++++++----- tests/testsuite/script/cargo.rs | 4 ++-- .../frontmatter-contains-whitespace.stderr | 8 +++---- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/src/cargo/util/frontmatter.rs b/src/cargo/util/frontmatter.rs index 9c9a9a7ff..7fbe7a5c3 100644 --- a/src/cargo/util/frontmatter.rs +++ b/src/cargo/util/frontmatter.rs @@ -6,8 +6,10 @@ type Span = std::ops::Range; pub struct ScriptSource<'s> { raw: &'s str, shebang: Option, + open: Option, info: Option, frontmatter: Option, + close: Option, content: Span, } @@ -22,8 +24,10 @@ impl<'s> ScriptSource<'s> { let mut source = Self { raw, shebang: None, + open: None, info: None, frontmatter: None, + close: None, content: 0..content_end, }; @@ -61,7 +65,10 @@ impl<'s> ScriptSource<'s> { } _ => {} } + let open_start = input.current_token_start(); let fence_pattern = input.next_slice(fence_length); + let open_end = input.current_token_start(); + source.open = Some(open_start..open_end); let Some(info_nl) = input.find_slice("\n") else { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; @@ -82,7 +89,10 @@ impl<'s> ScriptSource<'s> { let _ = input.next_slice(frontmatter_nl.start + 1); let frontmatter_end = input.current_token_start(); source.frontmatter = Some(frontmatter_start..frontmatter_end); + let close_start = input.current_token_start(); let _ = input.next_slice(fence_length); + let close_end = input.current_token_start(); + source.close = Some(close_start..close_end); let nl = input.find_slice("\n"); let after_closing_fence = input.next_slice( @@ -114,6 +124,10 @@ impl<'s> ScriptSource<'s> { self.shebang.clone() } + pub fn open_span(&self) -> Option { + self.open.clone() + } + pub fn info(&self) -> Option<&'s str> { self.info.clone().map(|span| &self.raw[span]) } @@ -130,6 +144,10 @@ impl<'s> ScriptSource<'s> { self.frontmatter.clone() } + pub fn close_span(&self) -> Option { + self.close.clone() + } + pub fn content(&self) -> &'s str { &self.raw[self.content.clone()] } diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index bf8979524..61b698229 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -6,7 +6,7 @@ use crate::util::restricted_names; pub(super) fn expand_manifest(content: &str) -> CargoResult { let source = ScriptSource::parse(content)?; - if let Some(frontmatter) = source.frontmatter() { + if let Some(span) = source.frontmatter_span() { match source.info() { Some("cargo") | None => {} Some(other) => { @@ -22,10 +22,21 @@ pub(super) fn expand_manifest(content: &str) -> CargoResult { } } - Ok(frontmatter.to_owned()) + // Include from file start to frontmatter end when we parse the TOML to get line numbers + // correct and so if a TOML error says "entire file", it shows the existing content, rather + // than blank lines. + // + // HACK: Since frontmatter open isn't valid TOML, we insert a comment + let mut frontmatter = content[0..span.end].to_owned(); + let open_span = source.open_span().unwrap(); + frontmatter.insert(open_span.start, '#'); + Ok(frontmatter) } else { - let frontmatter = ""; - Ok(frontmatter.to_owned()) + // Consider the shebang to be part of the frontmatter + // so if a TOML error says "entire file", it shows the existing content, rather + // than blank lines. + let span = source.shebang_span().unwrap_or(0..0); + Ok(content[span].to_owned()) } } @@ -88,11 +99,12 @@ time="0.1.25" fn main() {} "# ), - str![[r#" + str![[r##" +#---cargo [dependencies] time="0.1.25" -"#]] +"##]] ); } } diff --git a/tests/testsuite/script/cargo.rs b/tests/testsuite/script/cargo.rs index f48415400..41d548a07 100644 --- a/tests/testsuite/script/cargo.rs +++ b/tests/testsuite/script/cargo.rs @@ -209,9 +209,9 @@ fn main() { .with_stdout_data(str![""]) .with_stderr_data(str![[r#" [ERROR] invalid type: integer `3`, expected a version string like "0.9.8" or a detailed dependency like { version = "0.9.8" } - --> script.rs:2:7 + --> script.rs:9:7 | -2 | bar = 3 +9 | bar = 3 | ^ "#]]) diff --git a/tests/testsuite/script/rustc_fixtures/frontmatter-contains-whitespace.stderr b/tests/testsuite/script/rustc_fixtures/frontmatter-contains-whitespace.stderr index fc85ccead..391a5e9f8 100644 --- a/tests/testsuite/script/rustc_fixtures/frontmatter-contains-whitespace.stderr +++ b/tests/testsuite/script/rustc_fixtures/frontmatter-contains-whitespace.stderr @@ -1,5 +1,5 @@ [ERROR] invalid multi-line basic string, expected `/`, characters - --> script:8:2 - | -8 | 4␌+ - | ^ + --> script:10:2 + | +10 | 4␌+ + | ^ From 0603e4e3b58670e4f7677bc9b12f30600a8cec3a Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 8 Sep 2025 14:26:28 -0500 Subject: [PATCH 6/6] docs(frontmatter): Clarify each ScriptSource field's intent --- src/cargo/util/frontmatter.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/cargo/util/frontmatter.rs b/src/cargo/util/frontmatter.rs index 7fbe7a5c3..75429b99e 100644 --- a/src/cargo/util/frontmatter.rs +++ b/src/cargo/util/frontmatter.rs @@ -4,12 +4,20 @@ type Span = std::ops::Range; #[derive(Debug)] pub struct ScriptSource<'s> { + /// The full file raw: &'s str, + /// The `#!/usr/bin/env cargo` line, if present shebang: Option, + /// The code fence opener (`---`) open: Option, + /// Trailing text after `ScriptSource::open` that identifies the meaning of + /// `ScriptSource::frontmatter` info: Option, + /// The lines between `ScriptSource::open` and `ScriptSource::close` frontmatter: Option, + /// The code fence closer (`---`) close: Option, + /// All content after the frontmatter and shebang content: Span, }