diff --git a/askama_derive/src/heritage.rs b/askama_derive/src/heritage.rs index b381654d..64c366c0 100644 --- a/askama_derive/src/heritage.rs +++ b/askama_derive/src/heritage.rs @@ -6,7 +6,7 @@ use parser::node::{BlockDef, Macro}; use parser::{Node, Parsed, Span}; use crate::config::Config; -use crate::input::LiteralOrSpan; +use crate::spans::SourceSpan; use crate::{CompileError, FileInfo, HashMap}; pub(crate) struct Heritage<'a, 'h> { @@ -47,7 +47,7 @@ pub(crate) struct Context<'a> { pub(crate) imports: HashMap<&'a str, Arc>, pub(crate) path: Option<&'a Path>, pub(crate) parsed: &'a Parsed, - pub(crate) literal: Option, + pub(crate) literal: Option, pub(crate) template_span: proc_macro2::Span, } @@ -70,7 +70,7 @@ impl<'a> Context<'a> { config: &Config, path: &'a Path, parsed: &'a Parsed, - literal: Option, + literal: Option, template_span: proc_macro2::Span, ) -> Result { let mut extends = None; diff --git a/askama_derive/src/input.rs b/askama_derive/src/input.rs index 043ffd76..1ee8414a 100644 --- a/askama_derive/src/input.rs +++ b/askama_derive/src/input.rs @@ -1,45 +1,19 @@ use std::borrow::Cow; -use std::ops::Range; use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; use parser::node::Whitespace; use parser::{Node, Parsed}; -use proc_macro2::{Literal, Span}; +use proc_macro2::Span; use syn::punctuated::Punctuated; use syn::spanned::Spanned; use syn::{Attribute, Expr, ExprLit, ExprPath, Ident, Lit, LitBool, LitStr, Meta, Token}; use crate::config::{Config, SyntaxAndCache}; +use crate::spans::SourceSpan; use crate::{CompileError, FileInfo, HashMap, MsgValidEscapers}; -#[derive(Clone, Debug)] -pub(crate) enum LiteralOrSpan { - Literal(Literal), - // TODO: transclude source file - Path(Span), - // TODO: implement for "code-in-doc" - #[cfg_attr(not(feature = "code-in-doc"), allow(dead_code))] - Span(Span), -} - -impl LiteralOrSpan { - pub(crate) fn config_span(&self) -> Span { - match self { - LiteralOrSpan::Literal(literal) => literal.span(), - LiteralOrSpan::Path(span) | LiteralOrSpan::Span(span) => *span, - } - } - - pub(crate) fn content_subspan(&self, bytes: Range) -> Option { - match self { - Self::Literal(lit) => lit.subspan(bytes), - Self::Path(_) | Self::Span(_) => None, - } - } -} - #[derive(Clone)] pub(crate) struct TemplateInput<'a> { pub(crate) ast: &'a syn::DeriveInput, @@ -47,7 +21,7 @@ pub(crate) struct TemplateInput<'a> { pub(crate) config: &'a Config, pub(crate) syntax: &'a SyntaxAndCache<'a>, pub(crate) source: &'a Source, - pub(crate) source_span: Option, + pub(crate) source_span: Option, pub(crate) block: Option<(&'a str, Span)>, #[cfg(feature = "blocks")] pub(crate) blocks: &'a [Block], @@ -444,7 +418,7 @@ pub(crate) struct Block { } pub(crate) struct TemplateArgs { - pub(crate) source: (Source, Option), + pub(crate) source: (Source, Option), block: Option<(String, Span)>, #[cfg(feature = "blocks")] blocks: Vec, @@ -481,15 +455,15 @@ impl TemplateArgs { #[cfg(feature = "external-sources")] Some(PartialTemplateArgsSource::Path(s)) => ( Source::Path(s.value().into()), - Some(LiteralOrSpan::Path(s.span())), - ), - Some(PartialTemplateArgsSource::Source(s)) => ( - Source::Source(s.value().into()), - Some(LiteralOrSpan::Literal(s.token())), + Some(SourceSpan::Path(s.span())), ), + Some(PartialTemplateArgsSource::Source(s)) => { + let (source, span) = SourceSpan::from_source(s)?; + (Source::Source(source.into()), Some(span)) + } #[cfg(feature = "code-in-doc")] Some(PartialTemplateArgsSource::InDoc(span, source)) => { - (source, Some(LiteralOrSpan::Span(span))) + (source, Some(SourceSpan::Span(span))) } None => { return Err(CompileError::no_file_info( diff --git a/askama_derive/src/lib.rs b/askama_derive/src/lib.rs index 8a932606..973b248d 100644 --- a/askama_derive/src/lib.rs +++ b/askama_derive/src/lib.rs @@ -11,6 +11,7 @@ mod heritage; mod html; mod input; mod integration; +mod spans; #[cfg(test)] mod tests; diff --git a/askama_derive/src/spans.rs b/askama_derive/src/spans.rs new file mode 100644 index 00000000..6b0faff9 --- /dev/null +++ b/askama_derive/src/spans.rs @@ -0,0 +1,130 @@ +mod rustc_literal_escaper; + +use std::ops::Range; + +use proc_macro2::{Literal, Span}; +use syn::LitStr; + +use crate::CompileError; +use crate::spans::rustc_literal_escaper::unescape; + +#[allow(private_interfaces)] // don't look behind the curtain +#[derive(Clone, Debug)] +pub(crate) enum SourceSpan { + Source(SpannedSource), + // TODO: transclude source file + Path(Span), + // TODO: implement for "code-in-doc" + #[cfg_attr(not(feature = "code-in-doc"), allow(dead_code))] + Span(Span), +} + +impl SourceSpan { + pub(crate) fn from_source(source: LitStr) -> Result<(String, Self), CompileError> { + let (source, span) = SpannedSource::from_source(source)?; + Ok((source, Self::Source(span))) + } + + pub(crate) fn config_span(&self) -> Span { + match self { + SourceSpan::Source(literal) => literal.config_span(), + SourceSpan::Path(span) | SourceSpan::Span(span) => *span, + } + } + + pub(crate) fn content_subspan(&self, bytes: Range) -> Option { + match self { + Self::Source(source) => source.content_subspan(bytes), + Self::Path(_) | Self::Span(_) => None, + } + } +} + +#[derive(Clone, Debug)] +struct SpannedSource { + literal: Literal, + positions: Vec<(usize, usize)>, +} + +impl SpannedSource { + fn config_span(&self) -> Span { + self.literal.span() + } + + fn content_subspan(&self, bytes: Range) -> Option { + let start = self.find_position(bytes.start); + let end = self.find_position(bytes.end); + self.literal.subspan(start..end) + } + + fn find_position(&self, position: usize) -> usize { + match self + .positions + .binary_search_by_key(&position, |&(pos, _)| pos) + { + Ok(idx) => self.positions[idx].1, + Err(idx) => { + let (start_out, start_in) = self.positions[idx - 1]; + start_in + (position - start_out) + } + } + } + + fn from_source(source: LitStr) -> Result<(String, Self), CompileError> { + let literal = source.token(); + let unparsed = literal.to_string(); + let result = if unparsed.starts_with('r') { + Self::from_raw(&unparsed, literal) + } else { + Self::from_string(&unparsed, literal) + }; + result.map_err(|msg| CompileError::no_file_info(msg, Some(source.span()))) + } + + fn from_raw(unparsed: &str, literal: Literal) -> Result<(String, Self), &'static str> { + let start = unparsed + .find('"') + .ok_or("raw string literal should contain `\"` at its start")? + + 1; + let end = unparsed + .rfind('"') + .ok_or("raw string literal should contain `\"` at its end")?; + + let source = unparsed[start..end].to_owned(); + let span = Self { + literal, + positions: vec![(0, start), (source.len(), end)], + }; + Ok((source, span)) + } + + fn from_string(unparsed: &str, literal: Literal) -> Result<(String, Self), &'static str> { + let start = unparsed + .find('"') + .ok_or("string literal should have `\"` at its start")? + + 1; + let end = unparsed + .rfind('"') + .ok_or("string literal should have `\"` at its end")?; + let unparsed = &unparsed[start..end]; + + let mut source = String::with_capacity(unparsed.len()); + let mut positions = vec![(0, start)]; + let mut expected_start = 0usize; + let result = unescape(unparsed, |range, c| { + if range.start != expected_start { + positions.push((source.len(), range.start + start)); + expected_start = range.start; + } + expected_start += c.len_utf8(); + + source.push(c); + }); + if result.is_err() { + return Err("input string literal should be well-formed"); + } + + positions.push((source.len(), end)); + Ok((source, Self { literal, positions })) + } +} diff --git a/askama_derive/src/spans/rustc_literal_escaper.rs b/askama_derive/src/spans/rustc_literal_escaper.rs new file mode 100644 index 00000000..3d367095 --- /dev/null +++ b/askama_derive/src/spans/rustc_literal_escaper.rs @@ -0,0 +1,185 @@ +// The content of this file was copied and adapted from the project [`rustc-literal-escaper`] in +// revision [`425ca35`]. Please find the full list of contributors in [their revision history]. +// +// License: Apache-2.0 OR MIT +// Authors: The Rust Project Developers, Guillaume Gomez, Marijn Schouten +// +// [`rustc-literal-escaper`]: +// [`425ca35`]: +// [their revision history]: + +//! Utilities for validating (raw) string, char, and byte literals and +//! turning escape sequences into the values they represent. + +use std::ops::Range; +use std::str::Chars; + +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct EscapeError; + +/// Unescape the first unit of a string (double quoted syntax) +fn unescape_1(chars: &mut Chars<'_>) -> Result { + // Previous character was '\\', unescape what follows. + let c = chars.next().ok_or(EscapeError)?; + if c == '0' { + Ok('\0') + } else { + simple_escape(c).or_else(|c| match c { + 'x' => hex2unit(hex_escape(chars)?), + 'u' => { + let value = unicode_escape(chars)?; + if value > char::MAX as u32 { + Err(EscapeError) + } else { + char::from_u32(value).ok_or(EscapeError) + } + } + _ => Err(EscapeError), + }) + } +} + +/// Unescape a string literal +/// +/// Takes the contents of a raw string literal (without quotes) +/// and produces a sequence of `Result` +/// which are returned via `callback`. +pub(crate) fn unescape( + src: &str, + mut callback: impl FnMut(Range, char), +) -> Result<(), EscapeError> { + let mut chars = src.chars(); + while let Some(c) = chars.next() { + let start = src.len() - chars.as_str().len() - c.len_utf8(); + let res = match c { + '\\' => { + if let Some(b'\n') = chars.as_str().as_bytes().first() { + let _ = chars.next(); + // skip whitespace for backslash newline, see [Rust language reference] + // (https://doc.rust-lang.org/reference/tokens.html#string-literals). + skip_ascii_whitespace(&mut chars)?; + continue; + } else { + unescape_1(&mut chars)? + } + } + '"' => return Err(EscapeError), + '\r' => return Err(EscapeError), + c => c, + }; + let end = src.len() - chars.as_str().len(); + callback(start..end, res); + } + Ok(()) +} + +/// Interpret a non-nul ASCII escape +/// +/// Parses the character of an ASCII escape (except nul) without the leading backslash. +#[inline] // single use in Unescape::unescape_1 +fn simple_escape(c: char) -> Result { + // Previous character was '\\', unescape what follows. + match c { + '"' => Ok('"'), + 'n' => Ok('\n'), + 'r' => Ok('\r'), + 't' => Ok('\t'), + '\\' => Ok('\\'), + '\'' => Ok('\''), + _ => Err(c), + } +} + +/// Interpret a hexadecimal escape +/// +/// Parses the two hexadecimal characters of a hexadecimal escape without the leading r"\x". +#[inline] // single use in Unescape::unescape_1 +fn hex_escape(chars: &mut impl Iterator) -> Result { + let hi = chars.next().ok_or(EscapeError)?; + let hi = hi.to_digit(16).ok_or(EscapeError)?; + + let lo = chars.next().ok_or(EscapeError)?; + let lo = lo.to_digit(16).ok_or(EscapeError)?; + + Ok((hi * 16 + lo) as u8) +} + +/// Interpret a unicode escape +/// +/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape. +/// This r"{...}" normally comes after r"\u" and cannot start with an underscore. +#[inline] // single use in Unescape::unescape_1 +fn unicode_escape(chars: &mut impl Iterator) -> Result { + if chars.next() != Some('{') { + return Err(EscapeError); + } + + // First character must be a hexadecimal digit. + let mut value: u32 = match chars.next().ok_or(EscapeError)? { + '_' => return Err(EscapeError), + '}' => return Err(EscapeError), + c => c.to_digit(16).ok_or(EscapeError)?, + }; + + // First character is valid, now parse the rest of the number + // and closing brace. + let mut n_digits = 1; + loop { + match chars.next() { + None => return Err(EscapeError), + Some('_') => continue, + Some('}') => { + // Incorrect syntax has higher priority for error reporting + // than unallowed value for a literal. + return if n_digits > 6 { + Err(EscapeError) + } else { + Ok(value) + }; + } + Some(c) => { + let digit: u32 = c.to_digit(16).ok_or(EscapeError)?; + n_digits += 1; + if n_digits > 6 { + // Stop updating value since we're sure that it's incorrect already. + continue; + } + value = value * 16 + digit; + } + }; + } +} + +/// Interpret a string continuation escape (https://doc.rust-lang.org/reference/expressions/literal-expr.html#string-continuation-escapes) +/// +/// Skip ASCII whitespace, except for the formfeed character +/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)). +/// Warns on unescaped newline and following non-ASCII whitespace. +#[inline] // single use in Unescape::unescape +fn skip_ascii_whitespace(chars: &mut Chars<'_>) -> Result<(), EscapeError> { + let rest = chars.as_str(); + let first_non_space = rest + .bytes() + .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') + .unwrap_or(rest.len()); + let (space, rest) = rest.split_at(first_non_space); + if space.contains('\n') { + return Err(EscapeError); + } + *chars = rest.chars(); + if let Some(c) = chars.clone().next() + && c.is_whitespace() + { + return Err(EscapeError); + } + Ok(()) +} + +#[inline] +fn hex2unit(b: u8) -> Result { + if b.is_ascii() { + Ok(b as char) + } else { + Err(EscapeError) + } +} diff --git a/askama_derive/src/tests.rs b/askama_derive/src/tests.rs index 595b3823..82dd5f56 100644 --- a/askama_derive/src/tests.rs +++ b/askama_derive/src/tests.rs @@ -8,6 +8,7 @@ use prettyplease::unparse; use proc_macro2::TokenStream; use quote::quote; use similar::{Algorithm, ChangeTag, TextDiffConfig}; +use syn::parse_quote; use crate::integration::Buffer; use crate::{AnyTemplateArgs, derive_template}; @@ -1514,4 +1515,30 @@ fn regression_tests_span_change() { &[], 11, ); + + let _ = build_template(&parse_quote! { + #[template(source = "{{ \"x\" | ΔxΔyΔ }}", ext = "txt")] + struct Foo; + }); + let _ = build_template(&parse_quote! { + #[template(source = r"{{ "x" | ΔxΔyΔ }}", ext = "txt")] + struct Foo; + }); + let _ = build_template(&parse_quote! { + #[template(source = r#"{{ "x" | ΔxΔyΔ }}"#, ext = "txt")] + struct Foo; + }); + + let _ = build_template(&parse_quote! { + #[template(source = "{{ \"ΔxΔyΔ\" | x }}", ext = "txt")] + struct Foo; + }); + let _ = build_template(&parse_quote! { + #[template(source = r"{{ "ΔxΔyΔ" | x }}", ext = "txt")] + struct Foo; + }); + let _ = build_template(&parse_quote! { + #[template(source = r#"{{ "ΔxΔyΔ" | x }}"#, ext = "txt")] + struct Foo; + }); }