fix(manifest): Report script manifest errors for the right line number (#15927)

### What does this PR try to resolve?

Currently, if you have a manifest parse error for a cargo script, the
line number will be relative to the start of the frontmatter and not the
source file. This changes it so we get the line number relative to the
start of the source file.

### How to test and review this PR?

### Notes

To do this, I added span tracking to our frontmatter parser. To make
this easier, I used some helpers from winnow which is an existing
transitive dependency.

This span tracking will also come into use when I change the frontmatter
parser to start reporting spans in errors so we get annotated snippets
of source in the error messages to users.
This commit is contained in:
Ed Page 2025-09-08 19:59:42 +00:00 committed by GitHub
commit 0e07ceb3f3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 165 additions and 72 deletions

1
Cargo.lock generated
View File

@ -399,6 +399,7 @@ dependencies = [
"url", "url",
"walkdir", "walkdir",
"windows-sys 0.60.2", "windows-sys 0.60.2",
"winnow",
] ]
[[package]] [[package]]

View File

@ -118,6 +118,7 @@ url = "2.5.4"
varisat = "0.2.2" varisat = "0.2.2"
walkdir = "2.5.0" walkdir = "2.5.0"
windows-sys = "0.60" windows-sys = "0.60"
winnow = "0.7.13"
[workspace.lints.rust] [workspace.lints.rust]
rust_2018_idioms = "warn" # TODO: could this be removed? rust_2018_idioms = "warn" # TODO: could this be removed?
@ -220,6 +221,7 @@ unicode-width.workspace = true
unicode-xid.workspace = true unicode-xid.workspace = true
url.workspace = true url.workspace = true
walkdir.workspace = true walkdir.workspace = true
winnow.workspace = true
[target.'cfg(target_has_atomic = "64")'.dependencies] [target.'cfg(target_has_atomic = "64")'.dependencies]
tracing-chrome.workspace = true tracing-chrome.workspace = true

View File

@ -1,41 +1,66 @@
use crate::CargoResult; use crate::CargoResult;
type Span = std::ops::Range<usize>;
#[derive(Debug)] #[derive(Debug)]
pub struct ScriptSource<'s> { pub struct ScriptSource<'s> {
shebang: Option<&'s str>, /// The full file
info: Option<&'s str>, raw: &'s str,
frontmatter: Option<&'s str>, /// The `#!/usr/bin/env cargo` line, if present
content: &'s str, shebang: Option<Span>,
/// The code fence opener (`---`)
open: Option<Span>,
/// Trailing text after `ScriptSource::open` that identifies the meaning of
/// `ScriptSource::frontmatter`
info: Option<Span>,
/// The lines between `ScriptSource::open` and `ScriptSource::close`
frontmatter: Option<Span>,
/// The code fence closer (`---`)
close: Option<Span>,
/// All content after the frontmatter and shebang
content: Span,
} }
impl<'s> ScriptSource<'s> { impl<'s> ScriptSource<'s> {
pub fn parse(input: &'s str) -> CargoResult<Self> { pub fn parse(raw: &'s str) -> CargoResult<Self> {
use winnow::stream::FindSlice as _;
use winnow::stream::Location as _;
use winnow::stream::Offset as _;
use winnow::stream::Stream as _;
let content_end = raw.len();
let mut source = Self { let mut source = Self {
raw,
shebang: None, shebang: None,
open: None,
info: None, info: None,
frontmatter: None, frontmatter: None,
content: input, close: None,
content: 0..content_end,
}; };
if let Some(shebang_end) = strip_shebang(source.content) { let mut input = winnow::stream::LocatingSlice::new(raw);
let (shebang, content) = source.content.split_at(shebang_end);
source.shebang = Some(shebang); if let Some(shebang_end) = strip_shebang(input.as_ref()) {
source.content = content; let shebang_start = input.current_token_start();
let _ = input.next_slice(shebang_end);
let shebang_end = input.current_token_start();
source.shebang = Some(shebang_start..shebang_end);
source.content = shebang_end..content_end;
} }
let mut rest = source.content;
// Whitespace may precede a frontmatter but must end with a newline // Whitespace may precede a frontmatter but must end with a newline
if let Some(nl_end) = strip_ws_lines(rest) { if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
rest = &rest[nl_end..]; let _ = input.next_slice(nl_end);
} }
// Opens with a line that starts with 3 or more `-` followed by an optional identifier // Opens with a line that starts with 3 or more `-` followed by an optional identifier
const FENCE_CHAR: char = '-'; const FENCE_CHAR: char = '-';
let fence_length = rest let fence_length = input
.as_ref()
.char_indices() .char_indices()
.find_map(|(i, c)| (c != FENCE_CHAR).then_some(i)) .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
.unwrap_or(rest.len()); .unwrap_or_else(|| input.eof_offset());
match fence_length { match fence_length {
0 => { 0 => {
return Ok(source); return Ok(source);
@ -48,39 +73,50 @@ impl<'s> ScriptSource<'s> {
} }
_ => {} _ => {}
} }
let (fence_pattern, rest) = rest.split_at(fence_length); let open_start = input.current_token_start();
let Some(info_end_index) = rest.find('\n') else { let fence_pattern = input.next_slice(fence_length);
let open_end = input.current_token_start();
source.open = Some(open_start..open_end);
let Some(info_nl) = input.find_slice("\n") else {
anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
}; };
let (info, rest) = rest.split_at(info_end_index); let info = input.next_slice(info_nl.start);
let info = info.trim_matches(is_whitespace); let info = info.trim_matches(is_whitespace);
if !info.is_empty() { if !info.is_empty() {
source.info = Some(info); let info_start = info.offset_from(&raw);
let info_end = info_start + info.len();
source.info = Some(info_start..info_end);
} }
// Ends with a line that starts with a matching number of `-` only followed by whitespace // Ends with a line that starts with a matching number of `-` only followed by whitespace
let nl_fence_pattern = format!("\n{fence_pattern}"); let nl_fence_pattern = format!("\n{fence_pattern}");
let Some(frontmatter_nl) = rest.find(&nl_fence_pattern) else { let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
}; };
let frontmatter = &rest[..frontmatter_nl + 1]; let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
let frontmatter = frontmatter let _ = input.next_slice(frontmatter_nl.start + 1);
.strip_prefix('\n') let frontmatter_end = input.current_token_start();
.expect("earlier `found` + `split_at` left us here"); source.frontmatter = Some(frontmatter_start..frontmatter_end);
source.frontmatter = Some(frontmatter); let close_start = input.current_token_start();
let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..]; let _ = input.next_slice(fence_length);
let close_end = input.current_token_start();
source.close = Some(close_start..close_end);
let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, "")); let nl = input.find_slice("\n");
let after_closing_fence = input.next_slice(
nl.map(|span| span.end)
.unwrap_or_else(|| input.eof_offset()),
);
let content_start = input.current_token_start();
let after_closing_fence = after_closing_fence.trim_matches(is_whitespace); let after_closing_fence = after_closing_fence.trim_matches(is_whitespace);
if !after_closing_fence.is_empty() { if !after_closing_fence.is_empty() {
// extra characters beyond the original fence pattern, even if they are extra `-` // extra characters beyond the original fence pattern, even if they are extra `-`
anyhow::bail!("trailing characters found after frontmatter close"); anyhow::bail!("trailing characters found after frontmatter close");
} }
let frontmatter_len = input.len() - rest.len(); source.content = content_start..content_end;
source.content = &input[frontmatter_len..];
let repeat = Self::parse(source.content)?; let repeat = Self::parse(source.content())?;
if repeat.frontmatter.is_some() { if repeat.frontmatter.is_some() {
anyhow::bail!("only one frontmatter is supported"); anyhow::bail!("only one frontmatter is supported");
} }
@ -89,19 +125,43 @@ impl<'s> ScriptSource<'s> {
} }
pub fn shebang(&self) -> Option<&'s str> { pub fn shebang(&self) -> Option<&'s str> {
self.shebang self.shebang.clone().map(|span| &self.raw[span])
}
pub fn shebang_span(&self) -> Option<Span> {
self.shebang.clone()
}
pub fn open_span(&self) -> Option<Span> {
self.open.clone()
} }
pub fn info(&self) -> Option<&'s str> { pub fn info(&self) -> Option<&'s str> {
self.info self.info.clone().map(|span| &self.raw[span])
}
pub fn info_span(&self) -> Option<Span> {
self.info.clone()
} }
pub fn frontmatter(&self) -> Option<&'s str> { pub fn frontmatter(&self) -> Option<&'s str> {
self.frontmatter self.frontmatter.clone().map(|span| &self.raw[span])
}
pub fn frontmatter_span(&self) -> Option<Span> {
self.frontmatter.clone()
}
pub fn close_span(&self) -> Option<Span> {
self.close.clone()
} }
pub fn content(&self) -> &'s str { pub fn content(&self) -> &'s str {
self.content &self.raw[self.content.clone()]
}
pub fn content_span(&self) -> Span {
self.content.clone()
} }
} }

View File

@ -6,7 +6,7 @@ use crate::util::restricted_names;
pub(super) fn expand_manifest(content: &str) -> CargoResult<String> { pub(super) fn expand_manifest(content: &str) -> CargoResult<String> {
let source = ScriptSource::parse(content)?; let source = ScriptSource::parse(content)?;
if let Some(frontmatter) = source.frontmatter() { if let Some(span) = source.frontmatter_span() {
match source.info() { match source.info() {
Some("cargo") | None => {} Some("cargo") | None => {}
Some(other) => { Some(other) => {
@ -22,10 +22,21 @@ pub(super) fn expand_manifest(content: &str) -> CargoResult<String> {
} }
} }
Ok(frontmatter.to_owned()) // Include from file start to frontmatter end when we parse the TOML to get line numbers
// correct and so if a TOML error says "entire file", it shows the existing content, rather
// than blank lines.
//
// HACK: Since frontmatter open isn't valid TOML, we insert a comment
let mut frontmatter = content[0..span.end].to_owned();
let open_span = source.open_span().unwrap();
frontmatter.insert(open_span.start, '#');
Ok(frontmatter)
} else { } else {
let frontmatter = ""; // Consider the shebang to be part of the frontmatter
Ok(frontmatter.to_owned()) // so if a TOML error says "entire file", it shows the existing content, rather
// than blank lines.
let span = source.shebang_span().unwrap_or(0..0);
Ok(content[span].to_owned())
} }
} }
@ -88,11 +99,12 @@ time="0.1.25"
fn main() {} fn main() {}
"# "#
), ),
str![[r#" str![[r##"
#---cargo
[dependencies] [dependencies]
time="0.1.25" time="0.1.25"
"#]] "##]]
); );
} }
} }

View File

@ -277,11 +277,11 @@ impl LocalManifest {
let mut embedded = None; let mut embedded = None;
if is_embedded(path) { if is_embedded(path) {
let source = ScriptSource::parse(&data)?; let source = ScriptSource::parse(&data)?;
if let Some(frontmatter) = source.frontmatter() { if let Some(frontmatter) = source.frontmatter_span() {
embedded = Some(Embedded::exists(&data, frontmatter)); embedded = Some(Embedded::exists(frontmatter));
data = frontmatter.to_owned(); data = source.frontmatter().unwrap().to_owned();
} else if let Some(shebang) = source.shebang() { } else if let Some(shebang) = source.shebang_span() {
embedded = Some(Embedded::after(&data, shebang)); embedded = Some(Embedded::after(shebang));
data = String::new(); data = String::new();
} else { } else {
embedded = Some(Embedded::start()); embedded = Some(Embedded::start());
@ -592,33 +592,15 @@ impl Embedded {
Self::Implicit(0) Self::Implicit(0)
} }
fn after(input: &str, after: &str) -> Self { fn after(after: std::ops::Range<usize>) -> Self {
let span = substr_span(input, after); Self::Implicit(after.end)
let end = span.end;
Self::Implicit(end)
} }
fn exists(input: &str, exists: &str) -> Self { fn exists(exists: std::ops::Range<usize>) -> Self {
let span = substr_span(input, exists); Self::Explicit(exists)
Self::Explicit(span)
} }
} }
fn substr_span(haystack: &str, needle: &str) -> std::ops::Range<usize> {
let haystack_start_ptr = haystack.as_ptr();
let haystack_end_ptr = haystack[haystack.len()..haystack.len()].as_ptr();
let needle_start_ptr = needle.as_ptr();
let needle_end_ptr = needle[needle.len()..needle.len()].as_ptr();
assert!(needle_end_ptr < haystack_end_ptr);
assert!(haystack_start_ptr <= needle_start_ptr);
let start = needle_start_ptr as usize - haystack_start_ptr as usize;
let end = start + needle.len();
start..end
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
enum DependencyStatus { enum DependencyStatus {
None, None,

View File

@ -182,6 +182,42 @@ fn requires_z_flag() {
.run(); .run();
} }
#[cargo_test(nightly, reason = "-Zscript is unstable")]
fn manifest_parse_error() {
// Exagerate the newlines to make it more obvious if the error's line number is off
let script = r#"#!/usr/bin/env cargo
---
[dependencies]
bar = 3
---
fn main() {
println!("Hello world!");
}"#;
let p = cargo_test_support::project()
.file("script.rs", script)
.build();
p.cargo("-Zscript -v script.rs")
.masquerade_as_nightly_cargo(&["script"])
.with_status(101)
.with_stdout_data(str![""])
.with_stderr_data(str![[r#"
[ERROR] invalid type: integer `3`, expected a version string like "0.9.8" or a detailed dependency like { version = "0.9.8" }
--> script.rs:9:7
|
9 | bar = 3
| ^
"#]])
.run();
}
#[cargo_test(nightly, reason = "-Zscript is unstable")] #[cargo_test(nightly, reason = "-Zscript is unstable")]
fn clean_output_with_edition() { fn clean_output_with_edition() {
let script = r#"#!/usr/bin/env cargo let script = r#"#!/usr/bin/env cargo

View File

@ -1,5 +1,5 @@
[ERROR] invalid multi-line basic string, expected `/`, characters [ERROR] invalid multi-line basic string, expected `/`, characters
--> script:8:2 --> script:10:2
| |
8 | 4␌+ 10 | 4␌+
| ^ | ^