diff --git a/askama_parser/src/lib.rs b/askama_parser/src/lib.rs index f6b239d7..4efe6e9d 100644 --- a/askama_parser/src/lib.rs +++ b/askama_parser/src/lib.rs @@ -20,11 +20,11 @@ use std::{fmt, str}; use winnow::ascii::take_escaped; use winnow::combinator::{ - alt, cut_err, delimited, fail, not, opt, peek, preceded, repeat, terminated, + alt, cut_err, delimited, empty, fail, not, opt, peek, preceded, repeat, terminated, }; use winnow::error::{ErrMode, FromExternalError}; -use winnow::stream::{AsChar, Stream as _}; -use winnow::token::{any, none_of, one_of, take_till, take_while}; +use winnow::stream::AsChar; +use winnow::token::{any, none_of, one_of, take_while}; use winnow::{ModalParser, Parser}; use crate::ascii_str::{AsciiChar, AsciiStr}; @@ -739,33 +739,45 @@ pub struct CharLit<'a> { // Information about allowed character escapes is available at: // . fn char_lit<'a>(i: &mut &'a str) -> ParseResult<'a, CharLit<'a>> { - let start = i.checkpoint(); - let (b_prefix, s) = ( - opt('b'), - delimited( - '\'', - opt(take_escaped(take_till(1.., ['\\', '\'']), '\\', any)), - '\'', - ), - ) - .parse_next(i)?; + let start = *i; - let Some(s) = s else { - i.reset(&start); - return cut_error!("empty character literal", *i); + let prefix = terminated( + alt(('b'.value(Some(CharPrefix::Binary)), empty.value(None))), + '\'', + ) + .parse_next(i)?; + + let content = opt(terminated( + opt(take_escaped(none_of(['\\', '\'']), '\\', any)), + '\'', + )) + .parse_next(i)?; + + let Some(content) = content else { + if let Some(prefix) = prefix { + return cut_error!( + match prefix { + CharPrefix::Binary => "unterminated byte constant", + }, + start, + ); + } else { + return fail(i); + } }; - let mut is = s; + let content = match content.unwrap_or_default() { + "" => return cut_error!("empty character literal", start), + content => content, + }; + + let mut is = content; let Ok(c) = Char::parse(&mut is) else { - i.reset(&start); - return cut_error!("invalid character", *i); + return cut_error!("invalid character", start); }; let (nb, max_value, err1, err2) = match c { Char::Literal | Char::Escaped => { - return Ok(CharLit { - prefix: b_prefix.map(|_| CharPrefix::Binary), - content: s, - }); + return Ok(CharLit { prefix, content }); } Char::AsciiEscape(nb) => ( nb, @@ -774,28 +786,33 @@ fn char_lit<'a>(i: &mut &'a str) -> ParseResult<'a, CharLit<'a>> { "invalid character in ascii escape", "must be a character in the range [\\x00-\\x7f]", ), - Char::UnicodeEscape(nb) => ( - nb, - // `0x10FFFF` is the maximum value for a `\u` escaped character. - 0x0010_FFFF, - "invalid character in unicode escape", - "unicode escape must be at most 10FFFF", - ), + Char::UnicodeEscape(nb) => { + match prefix { + Some(CharPrefix::Binary) => { + return cut_error!( + "cannot use unicode escape in byte string in byte literal", + start, + ); + } + None => ( + nb, + // `0x10FFFF` is the maximum value for a `\u` escaped character. + 0x0010_FFFF, + "invalid character in unicode escape", + "unicode escape must be at most 10FFFF", + ), + } + } }; let Ok(nb) = u32::from_str_radix(nb, 16) else { - i.reset(&start); - return cut_error!(err1, *i); + return cut_error!(err1, start); }; if nb > max_value { - i.reset(&start); - return cut_error!(err2, *i); + return cut_error!(err2, start); } - Ok(CharLit { - prefix: b_prefix.map(|_| CharPrefix::Binary), - content: s, - }) + Ok(CharLit { prefix, content }) } /// Represents the different kinds of char declarations: diff --git a/fuzzing/fuzz/artifacts/derive/clusterfuzz-testcase-minimized-derive-4955521990066176 b/fuzzing/fuzz/artifacts/derive/clusterfuzz-testcase-minimized-derive-4955521990066176 new file mode 100644 index 00000000..5c713448 --- /dev/null +++ b/fuzzing/fuzz/artifacts/derive/clusterfuzz-testcase-minimized-derive-4955521990066176 @@ -0,0 +1 @@ +ÿÿÿ{{a!(b'e)}} ÿlÿe \ No newline at end of file diff --git a/testing/tests/ui/char_literal.rs b/testing/tests/ui/char_literal.rs index 997b0968..96c1c289 100644 --- a/testing/tests/ui/char_literal.rs +++ b/testing/tests/ui/char_literal.rs @@ -32,5 +32,77 @@ struct Err7; #[template(source = "{% let s = 'aaa' %}", ext = "html")] struct Err8; +#[derive(Template)] +#[template(source = r#"{{ b'c }}"#, ext = "html")] +struct UnterminatedByteLiteral; + +#[derive(Template)] +#[template(source = r#"{{ b'' }}"#, ext = "html")] +struct EmptyByteLiteral; + +#[derive(Template)] +#[template(source = r#"{{ b'\u{}' }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralEmpty; + +#[derive(Template)] +#[template(source = r#"{{ b'\u{0}' }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralMinAscii; + +#[derive(Template)] +#[template(source = r#"{{ b'\u{42}' }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralRandomAscii; + +#[derive(Template)] +#[template(source = r#"{{ b'\u{7f}' }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralMaxAscii; + +#[derive(Template)] +#[template(source = r#"{{ b'\u{80}' }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralMinMultilingual; + +#[derive(Template)] +#[template(source = r#"{{ b'\u{1234}' }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralRandomMultilingual; + +#[derive(Template)] +#[template(source = r#"{{ b'\u{10ffff}' }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralMaxMultilingual; + +#[derive(Template)] +#[template(source = r#"{{ a!(b'c) }}"#, ext = "html")] +struct UnterminatedByteLiteralInMacro; + +#[derive(Template)] +#[template(source = r#"{{ b'' }}"#, ext = "html")] +struct EmptyByteLiteralInMacro; + +#[derive(Template)] +#[template(source = r#"{{ b'\u{}' }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralEmptyInMacro; + +#[derive(Template)] +#[template(source = r#"{{ a!(b'\u{0}') }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralMinAsciiInMacro; + +#[derive(Template)] +#[template(source = r#"{{ a!(b'\u{42}') }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralRandomAsciiInMacro; + +#[derive(Template)] +#[template(source = r#"{{ a!(b'\u{7f}') }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralMaxAsciiInMacro; + +#[derive(Template)] +#[template(source = r#"{{ a!(b'\u{80}') }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralMinMultilingualInMacro; + +#[derive(Template)] +#[template(source = r#"{{ a!(b'\u{1234}') }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralRandomMultilingualInMacro; + +#[derive(Template)] +#[template(source = r#"{{ a!(b'\u{10ffff}') }}"#, ext = "html")] +struct UnicodeEscapeInByteLiteralMaxMultilingualInMacro; + fn main() { } diff --git a/testing/tests/ui/char_literal.stderr b/testing/tests/ui/char_literal.stderr index 5a116fc2..ec3cd2be 100644 --- a/testing/tests/ui/char_literal.stderr +++ b/testing/tests/ui/char_literal.stderr @@ -61,3 +61,147 @@ error: invalid character | 32 | #[template(source = "{% let s = 'aaa' %}", ext = "html")] | ^^^^^^^^^^^^^^^^^^^^^ + +error: unterminated byte constant + --> :1:3 + "b'c }}" + --> tests/ui/char_literal.rs:36:21 + | +36 | #[template(source = r#"{{ b'c }}"#, ext = "html")] + | ^^^^^^^^^^^^^^ + +error: empty character literal + --> :1:3 + "b'' }}" + --> tests/ui/char_literal.rs:40:21 + | +40 | #[template(source = r#"{{ b'' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^ + +error: invalid character + --> :1:3 + "b'\\u{}' }}" + --> tests/ui/char_literal.rs:44:21 + | +44 | #[template(source = r#"{{ b'\u{}' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:3 + "b'\\u{0}' }}" + --> tests/ui/char_literal.rs:48:21 + | +48 | #[template(source = r#"{{ b'\u{0}' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:3 + "b'\\u{42}' }}" + --> tests/ui/char_literal.rs:52:21 + | +52 | #[template(source = r#"{{ b'\u{42}' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:3 + "b'\\u{7f}' }}" + --> tests/ui/char_literal.rs:56:21 + | +56 | #[template(source = r#"{{ b'\u{7f}' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:3 + "b'\\u{80}' }}" + --> tests/ui/char_literal.rs:60:21 + | +60 | #[template(source = r#"{{ b'\u{80}' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:3 + "b'\\u{1234}' }}" + --> tests/ui/char_literal.rs:64:21 + | +64 | #[template(source = r#"{{ b'\u{1234}' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:3 + "b'\\u{10ffff}' }}" + --> tests/ui/char_literal.rs:68:21 + | +68 | #[template(source = r#"{{ b'\u{10ffff}' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^^^^^ + +error: unterminated byte constant + --> :1:6 + "b'c) }}" + --> tests/ui/char_literal.rs:72:21 + | +72 | #[template(source = r#"{{ a!(b'c) }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^ + +error: empty character literal + --> :1:3 + "b'' }}" + --> tests/ui/char_literal.rs:76:21 + | +76 | #[template(source = r#"{{ b'' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^ + +error: invalid character + --> :1:3 + "b'\\u{}' }}" + --> tests/ui/char_literal.rs:80:21 + | +80 | #[template(source = r#"{{ b'\u{}' }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:6 + "b'\\u{0}') }}" + --> tests/ui/char_literal.rs:84:21 + | +84 | #[template(source = r#"{{ a!(b'\u{0}') }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:6 + "b'\\u{42}') }}" + --> tests/ui/char_literal.rs:88:21 + | +88 | #[template(source = r#"{{ a!(b'\u{42}') }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:6 + "b'\\u{7f}') }}" + --> tests/ui/char_literal.rs:92:21 + | +92 | #[template(source = r#"{{ a!(b'\u{7f}') }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:6 + "b'\\u{80}') }}" + --> tests/ui/char_literal.rs:96:21 + | +96 | #[template(source = r#"{{ a!(b'\u{80}') }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:6 + "b'\\u{1234}') }}" + --> tests/ui/char_literal.rs:100:21 + | +100 | #[template(source = r#"{{ a!(b'\u{1234}') }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: cannot use unicode escape in byte string in byte literal + --> :1:6 + "b'\\u{10ffff}') }}" + --> tests/ui/char_literal.rs:104:21 + | +104 | #[template(source = r#"{{ a!(b'\u{10ffff}') }}"#, ext = "html")] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^