Add checks for chars

2025-09-28 05:21:14 +00:00 · 2024-04-25 17:40:55 +02:00 · 2024-04-25 17:40:55 +02:00 · c6d0ba8dbc
commit c6d0ba8dbc
parent bd0bfaa95c
10 changed files with 265 additions and 4 deletions
--- a/askama_parser/src/lib.rs
+++ b/askama_parser/src/lib.rs
@ -8,7 +8,7 @@ use std::rc::Rc;
 use std::{fmt, str};

 use nom::branch::alt;
-use nom::bytes::complete::{escaped, is_not, tag, take_till};
+use nom::bytes::complete::{escaped, is_not, tag, take_till, take_while_m_n};
 use nom::character::complete::{anychar, char, one_of, satisfy};
 use nom::combinator::{cut, eof, map, opt, recognize};
 use nom::error::{Error, ErrorKind, FromExternalError};
@ -152,7 +152,8 @@ impl fmt::Display for ParseError {
    }
 }

-pub(crate) type ParseResult<'a, T = &'a str> = Result<(&'a str, T), nom::Err<ErrorContext<'a>>>;
+pub(crate) type ParseErr<'a> = nom::Err<ErrorContext<'a>>;
+pub(crate) type ParseResult<'a, T = &'a str> = Result<(&'a str, T), ParseErr<'a>>;

 /// This type is used to handle `nom` errors and in particular to add custom error messages.
 /// It used to generate `ParserError`.
@ -350,13 +351,110 @@ fn str_lit(i: &str) -> ParseResult<'_> {
    Ok((i, s.unwrap_or_default()))
 }

+// Information about allowed character escapes is available at:
+// <https://doc.rust-lang.org/reference/tokens.html#character-literals>.
 fn char_lit(i: &str) -> ParseResult<'_> {
+    let start = i;
    let (i, s) = delimited(
        char('\''),
        opt(escaped(is_not("\\\'"), '\\', anychar)),
        char('\''),
    )(i)?;
-    Ok((i, s.unwrap_or_default()))
+    let Some(s) = s else {
+        return Err(nom::Err::Failure(ErrorContext {
+            input: start,
+            // Same error as rustc.
+            message: Some(Cow::Borrowed("empty character literal")),
+        }));
+    };
+    let Ok(("", c)) = Char::parse(s) else {
+        return Err(nom::Err::Failure(ErrorContext {
+            input: start,
+            message: Some(Cow::Borrowed("invalid character")),
+        }));
+    };
+    let (nb, max_value, err1, err2) = match c {
+        Char::Literal | Char::Escaped => return Ok((i, s)),
+        Char::AsciiEscape(nb) => (
+            nb,
+            // `0x7F` is the maximum value for a `\x` escaped character.
+            0x7F,
+            "invalid character in ascii escape",
+            "must be a character in the range [\\x00-\\x7f]",
+        ),
+        Char::UnicodeEscape(nb) => (
+            nb,
+            // `0x10FFFF` is the maximum value for a `\u` escaped character.
+            0x10FFFF,
+            "invalid character in unicode escape",
+            "unicode escape must be at most 10FFFF",
+        ),
+    };
+
+    let Ok(nb) = u32::from_str_radix(nb, 16) else {
+        return Err(nom::Err::Failure(ErrorContext {
+            input: start,
+            message: Some(Cow::Borrowed(err1)),
+        }));
+    };
+    if nb > max_value {
+        return Err(nom::Err::Failure(ErrorContext {
+            input: start,
+            message: Some(Cow::Borrowed(err2)),
+        }));
+    }
+    Ok((i, s))
+}
+
+/// Represents the different kinds of char declarations:
+enum Char<'a> {
+    /// Any character that is not escaped.
+    Literal,
+    /// An escaped character (like `\n`) which doesn't require any extra check.
+    Escaped,
+    /// Ascii escape (like `\x12`).
+    AsciiEscape(&'a str),
+    /// Unicode escape (like `\u{12}`).
+    UnicodeEscape(&'a str),
+}
+
+impl<'a> Char<'a> {
+    fn parse(i: &'a str) -> ParseResult<'a, Self> {
+        if i.chars().count() == 1 {
+            return Ok(("", Self::Literal));
+        }
+        map(
+            tuple((
+                char('\\'),
+                alt((
+                    map(char('n'), |_| Self::Escaped),
+                    map(char('r'), |_| Self::Escaped),
+                    map(char('t'), |_| Self::Escaped),
+                    map(char('\\'), |_| Self::Escaped),
+                    map(char('0'), |_| Self::Escaped),
+                    map(char('\''), |_| Self::Escaped),
+                    // Not useful but supported by rust.
+                    map(char('"'), |_| Self::Escaped),
+                    map(
+                        tuple((
+                            char('x'),
+                            take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()),
+                        )),
+                        |(_, s)| Self::AsciiEscape(s),
+                    ),
+                    map(
+                        tuple((
+                            tag("u{"),
+                            take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()),
+                            char('}'),
+                        )),
+                        |(_, s, _)| Self::UnicodeEscape(s),
+                    ),
+                )),
+            )),
+            |(_, ch)| ch,
+        )(i)
+    }
 }

 enum PathOrIdentifier<'a> {
@ -552,7 +650,7 @@ fn strip_common(base: &Path, path: &Path) -> String {
 #[cfg(not(windows))]
 #[cfg(test)]
 mod test {
-    use super::{num_lit, strip_common};
+    use super::{char_lit, num_lit, strip_common};
    use std::path::Path;

    #[test]
@ -599,4 +697,38 @@ mod test {
        assert_eq!(num_lit("1_.").unwrap(), (".", "1_"));
        assert_eq!(num_lit("1_2.").unwrap(), (".", "1_2"));
    }
+
+    #[test]
+    fn test_char_lit() {
+        assert_eq!(char_lit("'a'").unwrap(), ("", "a"));
+        assert_eq!(char_lit("'字'").unwrap(), ("", "字"));
+
+        // Escaped single characters.
+        assert_eq!(char_lit("'\\\"'").unwrap(), ("", "\\\""));
+        assert_eq!(char_lit("'\\''").unwrap(), ("", "\\'"));
+        assert_eq!(char_lit("'\\t'").unwrap(), ("", "\\t"));
+        assert_eq!(char_lit("'\\n'").unwrap(), ("", "\\n"));
+        assert_eq!(char_lit("'\\r'").unwrap(), ("", "\\r"));
+        assert_eq!(char_lit("'\\0'").unwrap(), ("", "\\0"));
+        // Escaped ascii characters (up to `0x7F`).
+        assert_eq!(char_lit("'\\x12'").unwrap(), ("", "\\x12"));
+        assert_eq!(char_lit("'\\x02'").unwrap(), ("", "\\x02"));
+        assert_eq!(char_lit("'\\x6a'").unwrap(), ("", "\\x6a"));
+        assert_eq!(char_lit("'\\x7F'").unwrap(), ("", "\\x7F"));
+        // Escaped unicode characters (up to `0x10FFFF`).
+        assert_eq!(char_lit("'\\u{A}'").unwrap(), ("", "\\u{A}"));
+        assert_eq!(char_lit("'\\u{10}'").unwrap(), ("", "\\u{10}"));
+        assert_eq!(char_lit("'\\u{aa}'").unwrap(), ("", "\\u{aa}"));
+        assert_eq!(char_lit("'\\u{10FFFF}'").unwrap(), ("", "\\u{10FFFF}"));
+
+        // Should fail.
+        assert!(char_lit("''").is_err());
+        assert!(char_lit("'\\o'").is_err());
+        assert!(char_lit("'\\x'").is_err());
+        assert!(char_lit("'\\x1'").is_err());
+        assert!(char_lit("'\\x80'").is_err());
+        assert!(char_lit("'\\u'").is_err());
+        assert!(char_lit("'\\u{}'").is_err());
+        assert!(char_lit("'\\u{110000}'").is_err());
+    }
 }
--- a/testing/templates/char-literals/char-literal-1.txt
+++ b/testing/templates/char-literals/char-literal-1.txt
@ -0,0 +1 @@
+{% let s = '\a' %}
--- a/testing/templates/char-literals/char-literal-2.txt
+++ b/testing/templates/char-literals/char-literal-2.txt
@ -0,0 +1 @@
+{% let s = '\x' %}
--- a/testing/templates/char-literals/char-literal-3.txt
+++ b/testing/templates/char-literals/char-literal-3.txt
@ -0,0 +1 @@
+{% let s = '\x1' %}
--- a/testing/templates/char-literals/char-literal-4.txt
+++ b/testing/templates/char-literals/char-literal-4.txt
@ -0,0 +1 @@
+{% let s = '\x80' %}
--- a/testing/templates/char-literals/char-literal-5.txt
+++ b/testing/templates/char-literals/char-literal-5.txt
@ -0,0 +1 @@
+{% let s = '\u' %}
--- a/testing/templates/char-literals/char-literal-6.txt
+++ b/testing/templates/char-literals/char-literal-6.txt
@ -0,0 +1 @@
+{% let s = '\u{}' %}
--- a/testing/templates/char-literals/char-literal-7.txt
+++ b/testing/templates/char-literals/char-literal-7.txt
@ -0,0 +1 @@
+{% let s = '\u{110000}' %}
--- a/testing/tests/ui/char_literal.rs
+++ b/testing/tests/ui/char_literal.rs
@ -0,0 +1,36 @@
+use askama::Template;
+
+#[derive(Template)]
+#[template(path = "char-literals/char-literal-1.txt")]
+struct Err1;
+
+#[derive(Template)]
+#[template(path = "char-literals/char-literal-2.txt")]
+struct Err2;
+
+#[derive(Template)]
+#[template(path = "char-literals/char-literal-3.txt")]
+struct Err3;
+
+#[derive(Template)]
+#[template(path = "char-literals/char-literal-4.txt")]
+struct Err4;
+
+#[derive(Template)]
+#[template(path = "char-literals/char-literal-5.txt")]
+struct Err5;
+
+#[derive(Template)]
+#[template(path = "char-literals/char-literal-6.txt")]
+struct Err6;
+
+#[derive(Template)]
+#[template(path = "char-literals/char-literal-7.txt")]
+struct Err7;
+
+#[derive(Template)]
+#[template(source = "{% let s = 'aaa' %}", ext = "html")]
+struct Err8;
+
+fn main() {
+}
--- a/testing/tests/ui/char_literal.stderr
+++ b/testing/tests/ui/char_literal.stderr
@ -0,0 +1,86 @@
+error: invalid character
+       failed to parse template source
+         --> testing/templates/char-literals/char-literal-1.txt:1:11
+       "'\\a' %}"
+ --> tests/ui/char_literal.rs:3:10
+  |
+3 | #[derive(Template)]
+  |          ^^^^^^^^
+  |
+  = note: this error originates in the derive macro `Template` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: invalid character
+       failed to parse template source
+         --> testing/templates/char-literals/char-literal-2.txt:1:11
+       "'\\x' %}"
+ --> tests/ui/char_literal.rs:7:10
+  |
+7 | #[derive(Template)]
+  |          ^^^^^^^^
+  |
+  = note: this error originates in the derive macro `Template` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: invalid character
+       failed to parse template source
+         --> testing/templates/char-literals/char-literal-3.txt:1:11
+       "'\\x1' %}"
+  --> tests/ui/char_literal.rs:11:10
+   |
+11 | #[derive(Template)]
+   |          ^^^^^^^^
+   |
+   = note: this error originates in the derive macro `Template` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: must be a character in the range [\x00-\x7f]
+       failed to parse template source
+         --> testing/templates/char-literals/char-literal-4.txt:1:11
+       "'\\x80' %}"
+  --> tests/ui/char_literal.rs:15:10
+   |
+15 | #[derive(Template)]
+   |          ^^^^^^^^
+   |
+   = note: this error originates in the derive macro `Template` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: invalid character
+       failed to parse template source
+         --> testing/templates/char-literals/char-literal-5.txt:1:11
+       "'\\u' %}"
+  --> tests/ui/char_literal.rs:19:10
+   |
+19 | #[derive(Template)]
+   |          ^^^^^^^^
+   |
+   = note: this error originates in the derive macro `Template` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: invalid character
+       failed to parse template source
+         --> testing/templates/char-literals/char-literal-6.txt:1:11
+       "'\\u{}' %}"
+  --> tests/ui/char_literal.rs:23:10
+   |
+23 | #[derive(Template)]
+   |          ^^^^^^^^
+   |
+   = note: this error originates in the derive macro `Template` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: unicode escape must be at most 10FFFF
+       failed to parse template source
+         --> testing/templates/char-literals/char-literal-7.txt:1:11
+       "'\\u{110000}' %}"
+  --> tests/ui/char_literal.rs:27:10
+   |
+27 | #[derive(Template)]
+   |          ^^^^^^^^
+   |
+   = note: this error originates in the derive macro `Template` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+error: invalid character
+       failed to parse template source at row 1, column 11 near:
+       "'aaa' %}"
+  --> tests/ui/char_literal.rs:31:10
+   |
+31 | #[derive(Template)]
+   |          ^^^^^^^^
+   |
+   = note: this error originates in the derive macro `Template` (in Nightly builds, run with -Z macro-backtrace for more info)