diff --git a/Cargo.lock b/Cargo.lock index c401a80c8..9adf1b1f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -399,6 +399,7 @@ dependencies = [ "url", "walkdir", "windows-sys 0.60.2", + "winnow", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4684ef0ec..c2e4976cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -118,6 +118,7 @@ url = "2.5.4" varisat = "0.2.2" walkdir = "2.5.0" windows-sys = "0.60" +winnow = "0.7.13" [workspace.lints.rust] rust_2018_idioms = "warn" # TODO: could this be removed? @@ -220,6 +221,7 @@ unicode-width.workspace = true unicode-xid.workspace = true url.workspace = true walkdir.workspace = true +winnow.workspace = true [target.'cfg(target_has_atomic = "64")'.dependencies] tracing-chrome.workspace = true diff --git a/src/cargo/util/frontmatter.rs b/src/cargo/util/frontmatter.rs index 022973035..75429b99e 100644 --- a/src/cargo/util/frontmatter.rs +++ b/src/cargo/util/frontmatter.rs @@ -1,41 +1,66 @@ use crate::CargoResult; +type Span = std::ops::Range; + #[derive(Debug)] pub struct ScriptSource<'s> { - shebang: Option<&'s str>, - info: Option<&'s str>, - frontmatter: Option<&'s str>, - content: &'s str, + /// The full file + raw: &'s str, + /// The `#!/usr/bin/env cargo` line, if present + shebang: Option, + /// The code fence opener (`---`) + open: Option, + /// Trailing text after `ScriptSource::open` that identifies the meaning of + /// `ScriptSource::frontmatter` + info: Option, + /// The lines between `ScriptSource::open` and `ScriptSource::close` + frontmatter: Option, + /// The code fence closer (`---`) + close: Option, + /// All content after the frontmatter and shebang + content: Span, } impl<'s> ScriptSource<'s> { - pub fn parse(input: &'s str) -> CargoResult { + pub fn parse(raw: &'s str) -> CargoResult { + use winnow::stream::FindSlice as _; + use winnow::stream::Location as _; + use winnow::stream::Offset as _; + use winnow::stream::Stream as _; + + let content_end = raw.len(); let mut source = Self { + raw, shebang: None, + open: None, info: None, frontmatter: None, - content: input, + close: None, + content: 0..content_end, }; - if let Some(shebang_end) = strip_shebang(source.content) { - let (shebang, content) = source.content.split_at(shebang_end); - source.shebang = Some(shebang); - source.content = content; + let mut input = winnow::stream::LocatingSlice::new(raw); + + if let Some(shebang_end) = strip_shebang(input.as_ref()) { + let shebang_start = input.current_token_start(); + let _ = input.next_slice(shebang_end); + let shebang_end = input.current_token_start(); + source.shebang = Some(shebang_start..shebang_end); + source.content = shebang_end..content_end; } - let mut rest = source.content; - // Whitespace may precede a frontmatter but must end with a newline - if let Some(nl_end) = strip_ws_lines(rest) { - rest = &rest[nl_end..]; + if let Some(nl_end) = strip_ws_lines(input.as_ref()) { + let _ = input.next_slice(nl_end); } // Opens with a line that starts with 3 or more `-` followed by an optional identifier const FENCE_CHAR: char = '-'; - let fence_length = rest + let fence_length = input + .as_ref() .char_indices() .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i)) - .unwrap_or(rest.len()); + .unwrap_or_else(|| input.eof_offset()); match fence_length { 0 => { return Ok(source); @@ -48,39 +73,50 @@ impl<'s> ScriptSource<'s> { } _ => {} } - let (fence_pattern, rest) = rest.split_at(fence_length); - let Some(info_end_index) = rest.find('\n') else { + let open_start = input.current_token_start(); + let fence_pattern = input.next_slice(fence_length); + let open_end = input.current_token_start(); + source.open = Some(open_start..open_end); + let Some(info_nl) = input.find_slice("\n") else { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; - let (info, rest) = rest.split_at(info_end_index); + let info = input.next_slice(info_nl.start); let info = info.trim_matches(is_whitespace); if !info.is_empty() { - source.info = Some(info); + let info_start = info.offset_from(&raw); + let info_end = info_start + info.len(); + source.info = Some(info_start..info_end); } // Ends with a line that starts with a matching number of `-` only followed by whitespace let nl_fence_pattern = format!("\n{fence_pattern}"); - let Some(frontmatter_nl) = rest.find(&nl_fence_pattern) else { + let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else { anyhow::bail!("no closing `{fence_pattern}` found for frontmatter"); }; - let frontmatter = &rest[..frontmatter_nl + 1]; - let frontmatter = frontmatter - .strip_prefix('\n') - .expect("earlier `found` + `split_at` left us here"); - source.frontmatter = Some(frontmatter); - let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..]; + let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring + let _ = input.next_slice(frontmatter_nl.start + 1); + let frontmatter_end = input.current_token_start(); + source.frontmatter = Some(frontmatter_start..frontmatter_end); + let close_start = input.current_token_start(); + let _ = input.next_slice(fence_length); + let close_end = input.current_token_start(); + source.close = Some(close_start..close_end); - let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, "")); + let nl = input.find_slice("\n"); + let after_closing_fence = input.next_slice( + nl.map(|span| span.end) + .unwrap_or_else(|| input.eof_offset()), + ); + let content_start = input.current_token_start(); let after_closing_fence = after_closing_fence.trim_matches(is_whitespace); if !after_closing_fence.is_empty() { // extra characters beyond the original fence pattern, even if they are extra `-` anyhow::bail!("trailing characters found after frontmatter close"); } - let frontmatter_len = input.len() - rest.len(); - source.content = &input[frontmatter_len..]; + source.content = content_start..content_end; - let repeat = Self::parse(source.content)?; + let repeat = Self::parse(source.content())?; if repeat.frontmatter.is_some() { anyhow::bail!("only one frontmatter is supported"); } @@ -89,19 +125,43 @@ impl<'s> ScriptSource<'s> { } pub fn shebang(&self) -> Option<&'s str> { - self.shebang + self.shebang.clone().map(|span| &self.raw[span]) + } + + pub fn shebang_span(&self) -> Option { + self.shebang.clone() + } + + pub fn open_span(&self) -> Option { + self.open.clone() } pub fn info(&self) -> Option<&'s str> { - self.info + self.info.clone().map(|span| &self.raw[span]) + } + + pub fn info_span(&self) -> Option { + self.info.clone() } pub fn frontmatter(&self) -> Option<&'s str> { - self.frontmatter + self.frontmatter.clone().map(|span| &self.raw[span]) + } + + pub fn frontmatter_span(&self) -> Option { + self.frontmatter.clone() + } + + pub fn close_span(&self) -> Option { + self.close.clone() } pub fn content(&self) -> &'s str { - self.content + &self.raw[self.content.clone()] + } + + pub fn content_span(&self) -> Span { + self.content.clone() } } diff --git a/src/cargo/util/toml/embedded.rs b/src/cargo/util/toml/embedded.rs index bf8979524..61b698229 100644 --- a/src/cargo/util/toml/embedded.rs +++ b/src/cargo/util/toml/embedded.rs @@ -6,7 +6,7 @@ use crate::util::restricted_names; pub(super) fn expand_manifest(content: &str) -> CargoResult { let source = ScriptSource::parse(content)?; - if let Some(frontmatter) = source.frontmatter() { + if let Some(span) = source.frontmatter_span() { match source.info() { Some("cargo") | None => {} Some(other) => { @@ -22,10 +22,21 @@ pub(super) fn expand_manifest(content: &str) -> CargoResult { } } - Ok(frontmatter.to_owned()) + // Include from file start to frontmatter end when we parse the TOML to get line numbers + // correct and so if a TOML error says "entire file", it shows the existing content, rather + // than blank lines. + // + // HACK: Since frontmatter open isn't valid TOML, we insert a comment + let mut frontmatter = content[0..span.end].to_owned(); + let open_span = source.open_span().unwrap(); + frontmatter.insert(open_span.start, '#'); + Ok(frontmatter) } else { - let frontmatter = ""; - Ok(frontmatter.to_owned()) + // Consider the shebang to be part of the frontmatter + // so if a TOML error says "entire file", it shows the existing content, rather + // than blank lines. + let span = source.shebang_span().unwrap_or(0..0); + Ok(content[span].to_owned()) } } @@ -88,11 +99,12 @@ time="0.1.25" fn main() {} "# ), - str![[r#" + str![[r##" +#---cargo [dependencies] time="0.1.25" -"#]] +"##]] ); } } diff --git a/src/cargo/util/toml_mut/manifest.rs b/src/cargo/util/toml_mut/manifest.rs index 13e1776e7..de810f238 100644 --- a/src/cargo/util/toml_mut/manifest.rs +++ b/src/cargo/util/toml_mut/manifest.rs @@ -277,11 +277,11 @@ impl LocalManifest { let mut embedded = None; if is_embedded(path) { let source = ScriptSource::parse(&data)?; - if let Some(frontmatter) = source.frontmatter() { - embedded = Some(Embedded::exists(&data, frontmatter)); - data = frontmatter.to_owned(); - } else if let Some(shebang) = source.shebang() { - embedded = Some(Embedded::after(&data, shebang)); + if let Some(frontmatter) = source.frontmatter_span() { + embedded = Some(Embedded::exists(frontmatter)); + data = source.frontmatter().unwrap().to_owned(); + } else if let Some(shebang) = source.shebang_span() { + embedded = Some(Embedded::after(shebang)); data = String::new(); } else { embedded = Some(Embedded::start()); @@ -592,33 +592,15 @@ impl Embedded { Self::Implicit(0) } - fn after(input: &str, after: &str) -> Self { - let span = substr_span(input, after); - let end = span.end; - Self::Implicit(end) + fn after(after: std::ops::Range) -> Self { + Self::Implicit(after.end) } - fn exists(input: &str, exists: &str) -> Self { - let span = substr_span(input, exists); - Self::Explicit(span) + fn exists(exists: std::ops::Range) -> Self { + Self::Explicit(exists) } } -fn substr_span(haystack: &str, needle: &str) -> std::ops::Range { - let haystack_start_ptr = haystack.as_ptr(); - let haystack_end_ptr = haystack[haystack.len()..haystack.len()].as_ptr(); - - let needle_start_ptr = needle.as_ptr(); - let needle_end_ptr = needle[needle.len()..needle.len()].as_ptr(); - - assert!(needle_end_ptr < haystack_end_ptr); - assert!(haystack_start_ptr <= needle_start_ptr); - let start = needle_start_ptr as usize - haystack_start_ptr as usize; - let end = start + needle.len(); - - start..end -} - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] enum DependencyStatus { None, diff --git a/tests/testsuite/script/cargo.rs b/tests/testsuite/script/cargo.rs index a7f23cfc2..41d548a07 100644 --- a/tests/testsuite/script/cargo.rs +++ b/tests/testsuite/script/cargo.rs @@ -182,6 +182,42 @@ fn requires_z_flag() { .run(); } +#[cargo_test(nightly, reason = "-Zscript is unstable")] +fn manifest_parse_error() { + // Exagerate the newlines to make it more obvious if the error's line number is off + let script = r#"#!/usr/bin/env cargo + + + + + +--- +[dependencies] +bar = 3 +--- + +fn main() { + println!("Hello world!"); +}"#; + let p = cargo_test_support::project() + .file("script.rs", script) + .build(); + + p.cargo("-Zscript -v script.rs") + .masquerade_as_nightly_cargo(&["script"]) + .with_status(101) + .with_stdout_data(str![""]) + .with_stderr_data(str![[r#" +[ERROR] invalid type: integer `3`, expected a version string like "0.9.8" or a detailed dependency like { version = "0.9.8" } + --> script.rs:9:7 + | +9 | bar = 3 + | ^ + +"#]]) + .run(); +} + #[cargo_test(nightly, reason = "-Zscript is unstable")] fn clean_output_with_edition() { let script = r#"#!/usr/bin/env cargo diff --git a/tests/testsuite/script/rustc_fixtures/frontmatter-contains-whitespace.stderr b/tests/testsuite/script/rustc_fixtures/frontmatter-contains-whitespace.stderr index fc85ccead..391a5e9f8 100644 --- a/tests/testsuite/script/rustc_fixtures/frontmatter-contains-whitespace.stderr +++ b/tests/testsuite/script/rustc_fixtures/frontmatter-contains-whitespace.stderr @@ -1,5 +1,5 @@ [ERROR] invalid multi-line basic string, expected `/`, characters - --> script:8:2 - | -8 | 4␌+ - | ^ + --> script:10:2 + | +10 | 4␌+ + | ^