mirror of
https://github.com/rust-lang/cargo.git
synced 2025-09-25 11:14:46 +00:00
fix(embedded): Handle more parsing corner cases (#15187)
### What does this PR try to resolve? This is part of #12207. I found these while implementing frontmatter support within rustc. I'll likely do another pass when I finish rustc support to - Unify tests between cargo and rustc - Improve error messages ### How should we test and review this PR? ### Additional information
This commit is contained in:
commit
3b784a42e3
@ -140,44 +140,28 @@ impl<'s> ScriptSource<'s> {
|
||||
content: input,
|
||||
};
|
||||
|
||||
// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
|
||||
// Shebang must start with `#!` literally, without any preceding whitespace.
|
||||
// For simplicity we consider any line starting with `#!` a shebang,
|
||||
// regardless of restrictions put on shebangs by specific platforms.
|
||||
if let Some(rest) = source.content.strip_prefix("#!") {
|
||||
// Ok, this is a shebang but if the next non-whitespace token is `[`,
|
||||
// then it may be valid Rust code, so consider it Rust code.
|
||||
//
|
||||
// NOTE: rustc considers line and block comments to be whitespace but to avoid
|
||||
// any more awareness of Rust grammar, we are excluding it.
|
||||
if rest.trim_start().starts_with('[') {
|
||||
return Ok(source);
|
||||
}
|
||||
|
||||
// No other choice than to consider this a shebang.
|
||||
let newline_end = source
|
||||
.content
|
||||
.find('\n')
|
||||
.map(|pos| pos + 1)
|
||||
.unwrap_or(source.content.len());
|
||||
let (shebang, content) = source.content.split_at(newline_end);
|
||||
if let Some(shebang_end) = strip_shebang(source.content) {
|
||||
let (shebang, content) = source.content.split_at(shebang_end);
|
||||
source.shebang = Some(shebang);
|
||||
source.content = content;
|
||||
}
|
||||
|
||||
const FENCE_CHAR: char = '-';
|
||||
|
||||
let mut trimmed_content = source.content;
|
||||
while !trimmed_content.is_empty() {
|
||||
let c = trimmed_content;
|
||||
let c = c.trim_start_matches([' ', '\t']);
|
||||
let c = c.trim_start_matches(['\r', '\n']);
|
||||
if c == trimmed_content {
|
||||
let mut rest = source.content;
|
||||
while !rest.is_empty() {
|
||||
let without_spaces = rest.trim_start_matches([' ', '\t']);
|
||||
let without_nl = without_spaces.trim_start_matches(['\r', '\n']);
|
||||
if without_nl == rest {
|
||||
// nothing trimmed
|
||||
break;
|
||||
} else if without_nl == without_spaces {
|
||||
// frontmatter must come after a newline
|
||||
return Ok(source);
|
||||
}
|
||||
trimmed_content = c;
|
||||
rest = without_nl;
|
||||
}
|
||||
let fence_end = trimmed_content
|
||||
let fence_end = rest
|
||||
.char_indices()
|
||||
.find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
|
||||
.unwrap_or(source.content.len());
|
||||
@ -190,8 +174,9 @@ impl<'s> ScriptSource<'s> {
|
||||
"found {fence_end} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
|
||||
)
|
||||
}
|
||||
_ => trimmed_content.split_at(fence_end),
|
||||
_ => rest.split_at(fence_end),
|
||||
};
|
||||
let nl_fence_pattern = format!("\n{fence_pattern}");
|
||||
let (info, content) = rest.split_once("\n").unwrap_or((rest, ""));
|
||||
let info = info.trim();
|
||||
if !info.is_empty() {
|
||||
@ -199,11 +184,11 @@ impl<'s> ScriptSource<'s> {
|
||||
}
|
||||
source.content = content;
|
||||
|
||||
let Some((frontmatter, content)) = source.content.split_once(fence_pattern) else {
|
||||
let Some(frontmatter_nl) = source.content.find(&nl_fence_pattern) else {
|
||||
anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
|
||||
};
|
||||
source.frontmatter = Some(frontmatter);
|
||||
source.content = content;
|
||||
source.frontmatter = Some(&source.content[..frontmatter_nl + 1]);
|
||||
source.content = &source.content[frontmatter_nl + nl_fence_pattern.len()..];
|
||||
|
||||
let (line, content) = source
|
||||
.content
|
||||
@ -235,6 +220,26 @@ impl<'s> ScriptSource<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
fn strip_shebang(input: &str) -> Option<usize> {
|
||||
// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
|
||||
// Shebang must start with `#!` literally, without any preceding whitespace.
|
||||
// For simplicity we consider any line starting with `#!` a shebang,
|
||||
// regardless of restrictions put on shebangs by specific platforms.
|
||||
if let Some(rest) = input.strip_prefix("#!") {
|
||||
// Ok, this is a shebang but if the next non-whitespace token is `[`,
|
||||
// then it may be valid Rust code, so consider it Rust code.
|
||||
//
|
||||
// NOTE: rustc considers line and block comments to be whitespace but to avoid
|
||||
// any more awareness of Rust grammar, we are excluding it.
|
||||
if !rest.trim_start().starts_with('[') {
|
||||
// No other choice than to consider this a shebang.
|
||||
let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
|
||||
return Some(newline_end);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_expand {
|
||||
use snapbox::assert_data_eq;
|
||||
@ -466,6 +471,86 @@ fn main() {}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_indent() {
|
||||
assert_source(
|
||||
r#"#!/usr/bin/env cargo
|
||||
---
|
||||
[dependencies]
|
||||
time="0.1.25"
|
||||
----
|
||||
|
||||
fn main() {}
|
||||
"#,
|
||||
str![[r##"
|
||||
shebang: "#!/usr/bin/env cargo\n"
|
||||
info: None
|
||||
frontmatter: None
|
||||
content: " ---\n [dependencies]\n time=\"0.1.25\"\n ----\n\nfn main() {}\n"
|
||||
|
||||
"##]],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_escaped() {
|
||||
assert_source(
|
||||
r#"#!/usr/bin/env cargo
|
||||
-----
|
||||
---
|
||||
---
|
||||
-----
|
||||
|
||||
fn main() {}
|
||||
"#,
|
||||
str![[r##"
|
||||
shebang: "#!/usr/bin/env cargo\n"
|
||||
info: None
|
||||
frontmatter: "---\n---\n"
|
||||
content: "\nfn main() {}\n"
|
||||
|
||||
"##]],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_invalid_escaped() {
|
||||
assert_err(
|
||||
ScriptSource::parse(
|
||||
r#"#!/usr/bin/env cargo
|
||||
---
|
||||
-----
|
||||
-----
|
||||
---
|
||||
|
||||
fn main() {}
|
||||
"#,
|
||||
),
|
||||
str!["unexpected trailing content on closing fence: `--`"],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_dashes_in_body() {
|
||||
assert_source(
|
||||
r#"#!/usr/bin/env cargo
|
||||
---
|
||||
Hello---
|
||||
World
|
||||
---
|
||||
|
||||
fn main() {}
|
||||
"#,
|
||||
str![[r##"
|
||||
shebang: "#!/usr/bin/env cargo\n"
|
||||
info: None
|
||||
frontmatter: "Hello---\nWorld\n"
|
||||
content: "\nfn main() {}\n"
|
||||
|
||||
"##]],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_mismatched_dashes() {
|
||||
assert_err(
|
||||
|
Loading…
x
Reference in New Issue
Block a user