Correct subspan for source = .. templates

This commit is contained in:
René Kijewski 2025-08-15 21:01:46 +02:00 committed by René Kijewski
parent 81252cd0a2
commit 29d62fef30
6 changed files with 356 additions and 39 deletions

View File

@ -6,7 +6,7 @@ use parser::node::{BlockDef, Macro};
use parser::{Node, Parsed, Span};
use crate::config::Config;
use crate::input::LiteralOrSpan;
use crate::spans::SourceSpan;
use crate::{CompileError, FileInfo, HashMap};
pub(crate) struct Heritage<'a, 'h> {
@ -47,7 +47,7 @@ pub(crate) struct Context<'a> {
pub(crate) imports: HashMap<&'a str, Arc<Path>>,
pub(crate) path: Option<&'a Path>,
pub(crate) parsed: &'a Parsed,
pub(crate) literal: Option<LiteralOrSpan>,
pub(crate) literal: Option<SourceSpan>,
pub(crate) template_span: proc_macro2::Span,
}
@ -70,7 +70,7 @@ impl<'a> Context<'a> {
config: &Config,
path: &'a Path,
parsed: &'a Parsed,
literal: Option<LiteralOrSpan>,
literal: Option<SourceSpan>,
template_span: proc_macro2::Span,
) -> Result<Self, CompileError> {
let mut extends = None;

View File

@ -1,45 +1,19 @@
use std::borrow::Cow;
use std::ops::Range;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use parser::node::Whitespace;
use parser::{Node, Parsed};
use proc_macro2::{Literal, Span};
use proc_macro2::Span;
use syn::punctuated::Punctuated;
use syn::spanned::Spanned;
use syn::{Attribute, Expr, ExprLit, ExprPath, Ident, Lit, LitBool, LitStr, Meta, Token};
use crate::config::{Config, SyntaxAndCache};
use crate::spans::SourceSpan;
use crate::{CompileError, FileInfo, HashMap, MsgValidEscapers};
#[derive(Clone, Debug)]
pub(crate) enum LiteralOrSpan {
Literal(Literal),
// TODO: transclude source file
Path(Span),
// TODO: implement for "code-in-doc"
#[cfg_attr(not(feature = "code-in-doc"), allow(dead_code))]
Span(Span),
}
impl LiteralOrSpan {
pub(crate) fn config_span(&self) -> Span {
match self {
LiteralOrSpan::Literal(literal) => literal.span(),
LiteralOrSpan::Path(span) | LiteralOrSpan::Span(span) => *span,
}
}
pub(crate) fn content_subspan(&self, bytes: Range<usize>) -> Option<Span> {
match self {
Self::Literal(lit) => lit.subspan(bytes),
Self::Path(_) | Self::Span(_) => None,
}
}
}
#[derive(Clone)]
pub(crate) struct TemplateInput<'a> {
pub(crate) ast: &'a syn::DeriveInput,
@ -47,7 +21,7 @@ pub(crate) struct TemplateInput<'a> {
pub(crate) config: &'a Config,
pub(crate) syntax: &'a SyntaxAndCache<'a>,
pub(crate) source: &'a Source,
pub(crate) source_span: Option<LiteralOrSpan>,
pub(crate) source_span: Option<SourceSpan>,
pub(crate) block: Option<(&'a str, Span)>,
#[cfg(feature = "blocks")]
pub(crate) blocks: &'a [Block],
@ -444,7 +418,7 @@ pub(crate) struct Block {
}
pub(crate) struct TemplateArgs {
pub(crate) source: (Source, Option<LiteralOrSpan>),
pub(crate) source: (Source, Option<SourceSpan>),
block: Option<(String, Span)>,
#[cfg(feature = "blocks")]
blocks: Vec<Block>,
@ -481,15 +455,15 @@ impl TemplateArgs {
#[cfg(feature = "external-sources")]
Some(PartialTemplateArgsSource::Path(s)) => (
Source::Path(s.value().into()),
Some(LiteralOrSpan::Path(s.span())),
),
Some(PartialTemplateArgsSource::Source(s)) => (
Source::Source(s.value().into()),
Some(LiteralOrSpan::Literal(s.token())),
Some(SourceSpan::Path(s.span())),
),
Some(PartialTemplateArgsSource::Source(s)) => {
let (source, span) = SourceSpan::from_source(s)?;
(Source::Source(source.into()), Some(span))
}
#[cfg(feature = "code-in-doc")]
Some(PartialTemplateArgsSource::InDoc(span, source)) => {
(source, Some(LiteralOrSpan::Span(span)))
(source, Some(SourceSpan::Span(span)))
}
None => {
return Err(CompileError::no_file_info(

View File

@ -11,6 +11,7 @@ mod heritage;
mod html;
mod input;
mod integration;
mod spans;
#[cfg(test)]
mod tests;

130
askama_derive/src/spans.rs Normal file
View File

@ -0,0 +1,130 @@
mod rustc_literal_escaper;
use std::ops::Range;
use proc_macro2::{Literal, Span};
use syn::LitStr;
use crate::CompileError;
use crate::spans::rustc_literal_escaper::unescape;
#[allow(private_interfaces)] // don't look behind the curtain
#[derive(Clone, Debug)]
pub(crate) enum SourceSpan {
Source(SpannedSource),
// TODO: transclude source file
Path(Span),
// TODO: implement for "code-in-doc"
#[cfg_attr(not(feature = "code-in-doc"), allow(dead_code))]
Span(Span),
}
impl SourceSpan {
pub(crate) fn from_source(source: LitStr) -> Result<(String, Self), CompileError> {
let (source, span) = SpannedSource::from_source(source)?;
Ok((source, Self::Source(span)))
}
pub(crate) fn config_span(&self) -> Span {
match self {
SourceSpan::Source(literal) => literal.config_span(),
SourceSpan::Path(span) | SourceSpan::Span(span) => *span,
}
}
pub(crate) fn content_subspan(&self, bytes: Range<usize>) -> Option<Span> {
match self {
Self::Source(source) => source.content_subspan(bytes),
Self::Path(_) | Self::Span(_) => None,
}
}
}
#[derive(Clone, Debug)]
struct SpannedSource {
literal: Literal,
positions: Vec<(usize, usize)>,
}
impl SpannedSource {
fn config_span(&self) -> Span {
self.literal.span()
}
fn content_subspan(&self, bytes: Range<usize>) -> Option<Span> {
let start = self.find_position(bytes.start);
let end = self.find_position(bytes.end);
self.literal.subspan(start..end)
}
fn find_position(&self, position: usize) -> usize {
match self
.positions
.binary_search_by_key(&position, |&(pos, _)| pos)
{
Ok(idx) => self.positions[idx].1,
Err(idx) => {
let (start_out, start_in) = self.positions[idx - 1];
start_in + (position - start_out)
}
}
}
fn from_source(source: LitStr) -> Result<(String, Self), CompileError> {
let literal = source.token();
let unparsed = literal.to_string();
let result = if unparsed.starts_with('r') {
Self::from_raw(&unparsed, literal)
} else {
Self::from_string(&unparsed, literal)
};
result.map_err(|msg| CompileError::no_file_info(msg, Some(source.span())))
}
fn from_raw(unparsed: &str, literal: Literal) -> Result<(String, Self), &'static str> {
let start = unparsed
.find('"')
.ok_or("raw string literal should contain `\"` at its start")?
+ 1;
let end = unparsed
.rfind('"')
.ok_or("raw string literal should contain `\"` at its end")?;
let source = unparsed[start..end].to_owned();
let span = Self {
literal,
positions: vec![(0, start), (source.len(), end)],
};
Ok((source, span))
}
fn from_string(unparsed: &str, literal: Literal) -> Result<(String, Self), &'static str> {
let start = unparsed
.find('"')
.ok_or("string literal should have `\"` at its start")?
+ 1;
let end = unparsed
.rfind('"')
.ok_or("string literal should have `\"` at its end")?;
let unparsed = &unparsed[start..end];
let mut source = String::with_capacity(unparsed.len());
let mut positions = vec![(0, start)];
let mut expected_start = 0usize;
let result = unescape(unparsed, |range, c| {
if range.start != expected_start {
positions.push((source.len(), range.start + start));
expected_start = range.start;
}
expected_start += c.len_utf8();
source.push(c);
});
if result.is_err() {
return Err("input string literal should be well-formed");
}
positions.push((source.len(), end));
Ok((source, Self { literal, positions }))
}
}

View File

@ -0,0 +1,185 @@
// The content of this file was copied and adapted from the project [`rustc-literal-escaper`] in
// revision [`425ca35`]. Please find the full list of contributors in [their revision history].
//
// License: Apache-2.0 OR MIT
// Authors: The Rust Project Developers, Guillaume Gomez, Marijn Schouten
//
// [`rustc-literal-escaper`]: <https://github.com/rust-lang/literal-escaper>
// [`425ca35`]: <https://github.com/rust-lang/literal-escaper/blob/425ca35a89d4ccb301bba7e2e59c5831bad0c303/src/lib.rs>
// [their revision history]: <https://github.com/rust-lang/literal-escaper/commits/425ca35a89d4ccb301bba7e2e59c5831bad0c303/src/lib.rs>
//! Utilities for validating (raw) string, char, and byte literals and
//! turning escape sequences into the values they represent.
use std::ops::Range;
use std::str::Chars;
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct EscapeError;
/// Unescape the first unit of a string (double quoted syntax)
fn unescape_1(chars: &mut Chars<'_>) -> Result<char, EscapeError> {
// Previous character was '\\', unescape what follows.
let c = chars.next().ok_or(EscapeError)?;
if c == '0' {
Ok('\0')
} else {
simple_escape(c).or_else(|c| match c {
'x' => hex2unit(hex_escape(chars)?),
'u' => {
let value = unicode_escape(chars)?;
if value > char::MAX as u32 {
Err(EscapeError)
} else {
char::from_u32(value).ok_or(EscapeError)
}
}
_ => Err(EscapeError),
})
}
}
/// Unescape a string literal
///
/// Takes the contents of a raw string literal (without quotes)
/// and produces a sequence of `Result<char, EscapeError>`
/// which are returned via `callback`.
pub(crate) fn unescape(
src: &str,
mut callback: impl FnMut(Range<usize>, char),
) -> Result<(), EscapeError> {
let mut chars = src.chars();
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\\' => {
if let Some(b'\n') = chars.as_str().as_bytes().first() {
let _ = chars.next();
// skip whitespace for backslash newline, see [Rust language reference]
// (https://doc.rust-lang.org/reference/tokens.html#string-literals).
skip_ascii_whitespace(&mut chars)?;
continue;
} else {
unescape_1(&mut chars)?
}
}
'"' => return Err(EscapeError),
'\r' => return Err(EscapeError),
c => c,
};
let end = src.len() - chars.as_str().len();
callback(start..end, res);
}
Ok(())
}
/// Interpret a non-nul ASCII escape
///
/// Parses the character of an ASCII escape (except nul) without the leading backslash.
#[inline] // single use in Unescape::unescape_1
fn simple_escape(c: char) -> Result<char, char> {
// Previous character was '\\', unescape what follows.
match c {
'"' => Ok('"'),
'n' => Ok('\n'),
'r' => Ok('\r'),
't' => Ok('\t'),
'\\' => Ok('\\'),
'\'' => Ok('\''),
_ => Err(c),
}
}
/// Interpret a hexadecimal escape
///
/// Parses the two hexadecimal characters of a hexadecimal escape without the leading r"\x".
#[inline] // single use in Unescape::unescape_1
fn hex_escape(chars: &mut impl Iterator<Item = char>) -> Result<u8, EscapeError> {
let hi = chars.next().ok_or(EscapeError)?;
let hi = hi.to_digit(16).ok_or(EscapeError)?;
let lo = chars.next().ok_or(EscapeError)?;
let lo = lo.to_digit(16).ok_or(EscapeError)?;
Ok((hi * 16 + lo) as u8)
}
/// Interpret a unicode escape
///
/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape.
/// This r"{...}" normally comes after r"\u" and cannot start with an underscore.
#[inline] // single use in Unescape::unescape_1
fn unicode_escape(chars: &mut impl Iterator<Item = char>) -> Result<u32, EscapeError> {
if chars.next() != Some('{') {
return Err(EscapeError);
}
// First character must be a hexadecimal digit.
let mut value: u32 = match chars.next().ok_or(EscapeError)? {
'_' => return Err(EscapeError),
'}' => return Err(EscapeError),
c => c.to_digit(16).ok_or(EscapeError)?,
};
// First character is valid, now parse the rest of the number
// and closing brace.
let mut n_digits = 1;
loop {
match chars.next() {
None => return Err(EscapeError),
Some('_') => continue,
Some('}') => {
// Incorrect syntax has higher priority for error reporting
// than unallowed value for a literal.
return if n_digits > 6 {
Err(EscapeError)
} else {
Ok(value)
};
}
Some(c) => {
let digit: u32 = c.to_digit(16).ok_or(EscapeError)?;
n_digits += 1;
if n_digits > 6 {
// Stop updating value since we're sure that it's incorrect already.
continue;
}
value = value * 16 + digit;
}
};
}
}
/// Interpret a string continuation escape (https://doc.rust-lang.org/reference/expressions/literal-expr.html#string-continuation-escapes)
///
/// Skip ASCII whitespace, except for the formfeed character
/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)).
/// Warns on unescaped newline and following non-ASCII whitespace.
#[inline] // single use in Unescape::unescape
fn skip_ascii_whitespace(chars: &mut Chars<'_>) -> Result<(), EscapeError> {
let rest = chars.as_str();
let first_non_space = rest
.bytes()
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
.unwrap_or(rest.len());
let (space, rest) = rest.split_at(first_non_space);
if space.contains('\n') {
return Err(EscapeError);
}
*chars = rest.chars();
if let Some(c) = chars.clone().next()
&& c.is_whitespace()
{
return Err(EscapeError);
}
Ok(())
}
#[inline]
fn hex2unit(b: u8) -> Result<char, EscapeError> {
if b.is_ascii() {
Ok(b as char)
} else {
Err(EscapeError)
}
}

View File

@ -8,6 +8,7 @@ use prettyplease::unparse;
use proc_macro2::TokenStream;
use quote::quote;
use similar::{Algorithm, ChangeTag, TextDiffConfig};
use syn::parse_quote;
use crate::integration::Buffer;
use crate::{AnyTemplateArgs, derive_template};
@ -1514,4 +1515,30 @@ fn regression_tests_span_change() {
&[],
11,
);
let _ = build_template(&parse_quote! {
#[template(source = "{{ \"x\" | ΔxΔyΔ }}", ext = "txt")]
struct Foo;
});
let _ = build_template(&parse_quote! {
#[template(source = r"{{ "x" | ΔxΔyΔ }}", ext = "txt")]
struct Foo;
});
let _ = build_template(&parse_quote! {
#[template(source = r#"{{ "x" | ΔxΔyΔ }}"#, ext = "txt")]
struct Foo;
});
let _ = build_template(&parse_quote! {
#[template(source = "{{ \"ΔxΔyΔ\" | x }}", ext = "txt")]
struct Foo;
});
let _ = build_template(&parse_quote! {
#[template(source = r"{{ "ΔxΔyΔ" | x }}", ext = "txt")]
struct Foo;
});
let _ = build_template(&parse_quote! {
#[template(source = r#"{{ "ΔxΔyΔ" | x }}"#, ext = "txt")]
struct Foo;
});
}