rust-analyzer/crates/parser/src/frontmatter.rs

// Copied from https://github.com/rust-lang/cargo/blob/367fd9f213750cd40317803dd0a5a3ce3f0c676d/src/cargo/util/frontmatter.rs
#![expect(dead_code)] // avoid editing
#![expect(unreachable_pub)] // avoid editing
#![expect(clippy::useless_format)] // avoid editing

type Span = std::ops::Range<usize>;

#[derive(Debug)]
pub struct ScriptSource<'s> {
    /// The full file
    raw: &'s str,
    /// The `#!/usr/bin/env cargo` line, if present
    shebang: Option<Span>,
    /// The code fence opener (`---`)
    open: Option<Span>,
    /// Trailing text after `ScriptSource::open` that identifies the meaning of
    /// `ScriptSource::frontmatter`
    info: Option<Span>,
    /// The lines between `ScriptSource::open` and `ScriptSource::close`
    frontmatter: Option<Span>,
    /// The code fence closer (`---`)
    close: Option<Span>,
    /// All content after the frontmatter and shebang
    content: Span,
}

impl<'s> ScriptSource<'s> {
    pub fn parse(raw: &'s str) -> Result<Self, FrontmatterError> {
        use winnow::stream::FindSlice as _;
        use winnow::stream::Location as _;
        use winnow::stream::Offset as _;
        use winnow::stream::Stream as _;

        let content_end = raw.len();
        let mut source = Self {
            raw,
            shebang: None,
            open: None,
            info: None,
            frontmatter: None,
            close: None,
            content: 0..content_end,
        };

        let mut input = winnow::stream::LocatingSlice::new(raw);

        if let Some(shebang_end) = strip_shebang(input.as_ref()) {
            let shebang_start = input.current_token_start();
            let _ = input.next_slice(shebang_end);
            let shebang_end = input.current_token_start();
            source.shebang = Some(shebang_start..shebang_end);
            source.content = shebang_end..content_end;
        }

        // Whitespace may precede a frontmatter but must end with a newline
        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
            let _ = input.next_slice(nl_end);
        }

        // Opens with a line that starts with 3 or more `-` followed by an optional identifier
        const FENCE_CHAR: char = '-';
        let fence_length = input
            .as_ref()
            .char_indices()
            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
            .unwrap_or_else(|| input.eof_offset());
        let open_start = input.current_token_start();
        let fence_pattern = input.next_slice(fence_length);
        let open_end = input.current_token_start();
        match fence_length {
            0 => {
                return Ok(source);
            }
            1 | 2 => {
                // either not a frontmatter or invalid frontmatter opening
                return Err(FrontmatterError::new(
                    format!(
                        "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
                    ),
                    raw.len()..raw.len(),
                ).push_visible_span(open_start..open_end));
            }
            _ => {}
        }
        source.open = Some(open_start..open_end);
        let Some(info_nl) = input.find_slice("\n") else {
            return Err(FrontmatterError::new(
                format!("unclosed frontmatter; expected `{fence_pattern}`"),
                raw.len()..raw.len(),
            )
            .push_visible_span(open_start..open_end));
        };
        let info = input.next_slice(info_nl.start);
        let info = info.strip_suffix('\r').unwrap_or(info); // already excludes `\n`
        let info = info.trim_matches(is_horizontal_whitespace);
        if !info.is_empty() {
            let info_start = info.offset_from(&raw);
            let info_end = info_start + info.len();
            source.info = Some(info_start..info_end);
        }

        // Ends with a line that starts with a matching number of `-` only followed by whitespace
        let nl_fence_pattern = format!("\n{fence_pattern}");
        let Some(frontmatter_nl) = input.find_slice(nl_fence_pattern.as_str()) else {
            for len in (2..(nl_fence_pattern.len() - 1)).rev() {
                let Some(frontmatter_nl) = input.find_slice(&nl_fence_pattern[0..len]) else {
                    continue;
                };
                let _ = input.next_slice(frontmatter_nl.start + 1);
                let close_start = input.current_token_start();
                let _ = input.next_slice(len);
                let close_end = input.current_token_start();
                let fewer_dashes = fence_length - len;
                return Err(FrontmatterError::new(
                    format!(
                        "closing code fence has {fewer_dashes} less `-` than the opening fence"
                    ),
                    close_start..close_end,
                )
                .push_visible_span(open_start..open_end));
            }
            return Err(FrontmatterError::new(
                format!("unclosed frontmatter; expected `{fence_pattern}`"),
                raw.len()..raw.len(),
            )
            .push_visible_span(open_start..open_end));
        };
        let frontmatter_start = input.current_token_start() + 1; // skip nl from infostring
        let _ = input.next_slice(frontmatter_nl.start + 1);
        let frontmatter_end = input.current_token_start();
        source.frontmatter = Some(frontmatter_start..frontmatter_end);
        let close_start = input.current_token_start();
        let _ = input.next_slice(fence_length);
        let close_end = input.current_token_start();
        source.close = Some(close_start..close_end);

        let nl = input.find_slice("\n");
        let after_closing_fence =
            input.next_slice(nl.map(|span| span.end).unwrap_or_else(|| input.eof_offset()));
        let content_start = input.current_token_start();
        let extra_dashes = after_closing_fence.chars().take_while(|b| *b == FENCE_CHAR).count();
        if 0 < extra_dashes {
            let extra_start = close_end;
            let extra_end = extra_start + extra_dashes;
            return Err(FrontmatterError::new(
                format!("closing code fence has {extra_dashes} more `-` than the opening fence"),
                extra_start..extra_end,
            )
            .push_visible_span(open_start..open_end));
        } else {
            let after_closing_fence = strip_newline(after_closing_fence);
            let after_closing_fence = after_closing_fence.trim_matches(is_horizontal_whitespace);
            if !after_closing_fence.is_empty() {
                // extra characters beyond the original fence pattern
                let after_start = after_closing_fence.offset_from(&raw);
                let after_end = after_start + after_closing_fence.len();
                return Err(FrontmatterError::new(
                    format!("unexpected characters after frontmatter close"),
                    after_start..after_end,
                )
                .push_visible_span(open_start..open_end));
            }
        }

        source.content = content_start..content_end;

        if let Some(nl_end) = strip_ws_lines(input.as_ref()) {
            let _ = input.next_slice(nl_end);
        }
        let fence_length = input
            .as_ref()
            .char_indices()
            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
            .unwrap_or_else(|| input.eof_offset());
        if 0 < fence_length {
            let fence_start = input.current_token_start();
            let fence_end = fence_start + fence_length;
            return Err(FrontmatterError::new(
                format!("only one frontmatter is supported"),
                fence_start..fence_end,
            )
            .push_visible_span(open_start..open_end)
            .push_visible_span(close_start..close_end));
        }

        Ok(source)
    }

    pub fn shebang(&self) -> Option<&'s str> {
        self.shebang.clone().map(|span| &self.raw[span])
    }

    pub fn shebang_span(&self) -> Option<Span> {
        self.shebang.clone()
    }

    pub fn open_span(&self) -> Option<Span> {
        self.open.clone()
    }

    pub fn info(&self) -> Option<&'s str> {
        self.info.clone().map(|span| &self.raw[span])
    }

    pub fn info_span(&self) -> Option<Span> {
        self.info.clone()
    }

    pub fn frontmatter(&self) -> Option<&'s str> {
        self.frontmatter.clone().map(|span| &self.raw[span])
    }

    pub fn frontmatter_span(&self) -> Option<Span> {
        self.frontmatter.clone()
    }

    pub fn close_span(&self) -> Option<Span> {
        self.close.clone()
    }

    pub fn content(&self) -> &'s str {
        &self.raw[self.content.clone()]
    }

    pub fn content_span(&self) -> Span {
        self.content.clone()
    }
}

/// Returns the index after the shebang line, if present
pub fn strip_shebang(input: &str) -> Option<usize> {
    // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
    // Shebang must start with `#!` literally, without any preceding whitespace.
    // For simplicity we consider any line starting with `#!` a shebang,
    // regardless of restrictions put on shebangs by specific platforms.
    if let Some(rest) = input.strip_prefix("#!") {
        // Ok, this is a shebang but if the next non-whitespace token is `[`,
        // then it may be valid Rust code, so consider it Rust code.
        //
        // NOTE: rustc considers line and block comments to be whitespace but to avoid
        // any more awareness of Rust grammar, we are excluding it.
        if !rest.trim_start().starts_with('[') {
            // No other choice than to consider this a shebang.
            let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
            return Some(newline_end);
        }
    }
    None
}

/// Returns the index after any lines with only whitespace, if present
pub fn strip_ws_lines(input: &str) -> Option<usize> {
    let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
    if ws_end == 0 {
        return None;
    }

    let nl_start = input[0..ws_end].rfind('\n')?;
    let nl_end = nl_start + 1;
    Some(nl_end)
}

/// True if `c` is considered a whitespace according to Rust language definition.
/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
/// for definitions of these classes.
fn is_whitespace(c: char) -> bool {
    // This is Pattern_White_Space.
    //
    // Note that this set is stable (ie, it doesn't change with different
    // Unicode versions), so it's ok to just hard-code the values.

    matches!(
        c,
        // End-of-line characters
        | '\u{000A}' // line feed (\n)
        | '\u{000B}' // vertical tab
        | '\u{000C}' // form feed
        | '\u{000D}' // carriage return (\r)
        | '\u{0085}' // next line (from latin1)
        | '\u{2028}' // LINE SEPARATOR
        | '\u{2029}' // PARAGRAPH SEPARATOR

        // `Default_Ignorable_Code_Point` characters
        | '\u{200E}' // LEFT-TO-RIGHT MARK
        | '\u{200F}' // RIGHT-TO-LEFT MARK

        // Horizontal space characters
        | '\u{0009}'   // tab (\t)
        | '\u{0020}' // space
    )
}

/// True if `c` is considered horizontal whitespace according to Rust language definition.
fn is_horizontal_whitespace(c: char) -> bool {
    // This is Pattern_White_Space.
    //
    // Note that this set is stable (ie, it doesn't change with different
    // Unicode versions), so it's ok to just hard-code the values.

    matches!(
        c,
        // Horizontal space characters
        '\u{0009}'   // tab (\t)
        | '\u{0020}' // space
    )
}

fn strip_newline(text: &str) -> &str {
    text.strip_suffix("\r\n").or_else(|| text.strip_suffix('\n')).unwrap_or(text)
}

#[derive(Debug)]
pub struct FrontmatterError {
    message: String,
    primary_span: Span,
    visible_spans: Vec<Span>,
}

impl FrontmatterError {
    pub fn new(message: impl Into<String>, span: Span) -> Self {
        Self { message: message.into(), primary_span: span, visible_spans: Vec::new() }
    }

    pub fn push_visible_span(mut self, span: Span) -> Self {
        self.visible_spans.push(span);
        self
    }

    pub fn message(&self) -> &str {
        self.message.as_str()
    }

    pub fn primary_span(&self) -> Span {
        self.primary_span.clone()
    }

    pub fn visible_spans(&self) -> &[Span] {
        &self.visible_spans
    }
}

impl std::fmt::Display for FrontmatterError {
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        self.message.fmt(fmt)
    }
}

impl std::error::Error for FrontmatterError {}