parser: fail on unterminated byte literal

Resolves <https://issues.oss-fuzz.com/issues/433650278>.

Also, I noticed that an Unicode escape in a byte literal, e.g.
`b'\u{42}'`, was accepted when it should not have been.
This commit is contained in:
René Kijewski 2025-07-28 19:51:55 +02:00 committed by René Kijewski
parent ec17ed4813
commit d670d9b91c
4 changed files with 272 additions and 38 deletions

View File

@ -20,11 +20,11 @@ use std::{fmt, str};
use winnow::ascii::take_escaped;
use winnow::combinator::{
alt, cut_err, delimited, fail, not, opt, peek, preceded, repeat, terminated,
alt, cut_err, delimited, empty, fail, not, opt, peek, preceded, repeat, terminated,
};
use winnow::error::{ErrMode, FromExternalError};
use winnow::stream::{AsChar, Stream as _};
use winnow::token::{any, none_of, one_of, take_till, take_while};
use winnow::stream::AsChar;
use winnow::token::{any, none_of, one_of, take_while};
use winnow::{ModalParser, Parser};
use crate::ascii_str::{AsciiChar, AsciiStr};
@ -739,33 +739,45 @@ pub struct CharLit<'a> {
// Information about allowed character escapes is available at:
// <https://doc.rust-lang.org/reference/tokens.html#character-literals>.
fn char_lit<'a>(i: &mut &'a str) -> ParseResult<'a, CharLit<'a>> {
let start = i.checkpoint();
let (b_prefix, s) = (
opt('b'),
delimited(
'\'',
opt(take_escaped(take_till(1.., ['\\', '\'']), '\\', any)),
'\'',
),
)
.parse_next(i)?;
let start = *i;
let Some(s) = s else {
i.reset(&start);
return cut_error!("empty character literal", *i);
let prefix = terminated(
alt(('b'.value(Some(CharPrefix::Binary)), empty.value(None))),
'\'',
)
.parse_next(i)?;
let content = opt(terminated(
opt(take_escaped(none_of(['\\', '\'']), '\\', any)),
'\'',
))
.parse_next(i)?;
let Some(content) = content else {
if let Some(prefix) = prefix {
return cut_error!(
match prefix {
CharPrefix::Binary => "unterminated byte constant",
},
start,
);
} else {
return fail(i);
}
};
let mut is = s;
let content = match content.unwrap_or_default() {
"" => return cut_error!("empty character literal", start),
content => content,
};
let mut is = content;
let Ok(c) = Char::parse(&mut is) else {
i.reset(&start);
return cut_error!("invalid character", *i);
return cut_error!("invalid character", start);
};
let (nb, max_value, err1, err2) = match c {
Char::Literal | Char::Escaped => {
return Ok(CharLit {
prefix: b_prefix.map(|_| CharPrefix::Binary),
content: s,
});
return Ok(CharLit { prefix, content });
}
Char::AsciiEscape(nb) => (
nb,
@ -774,28 +786,33 @@ fn char_lit<'a>(i: &mut &'a str) -> ParseResult<'a, CharLit<'a>> {
"invalid character in ascii escape",
"must be a character in the range [\\x00-\\x7f]",
),
Char::UnicodeEscape(nb) => (
nb,
// `0x10FFFF` is the maximum value for a `\u` escaped character.
0x0010_FFFF,
"invalid character in unicode escape",
"unicode escape must be at most 10FFFF",
),
Char::UnicodeEscape(nb) => {
match prefix {
Some(CharPrefix::Binary) => {
return cut_error!(
"cannot use unicode escape in byte string in byte literal",
start,
);
}
None => (
nb,
// `0x10FFFF` is the maximum value for a `\u` escaped character.
0x0010_FFFF,
"invalid character in unicode escape",
"unicode escape must be at most 10FFFF",
),
}
}
};
let Ok(nb) = u32::from_str_radix(nb, 16) else {
i.reset(&start);
return cut_error!(err1, *i);
return cut_error!(err1, start);
};
if nb > max_value {
i.reset(&start);
return cut_error!(err2, *i);
return cut_error!(err2, start);
}
Ok(CharLit {
prefix: b_prefix.map(|_| CharPrefix::Binary),
content: s,
})
Ok(CharLit { prefix, content })
}
/// Represents the different kinds of char declarations:

View File

@ -0,0 +1 @@
˙˙˙{{a!(b'e)}} ˙l˙e

View File

@ -32,5 +32,77 @@ struct Err7;
#[template(source = "{% let s = 'aaa' %}", ext = "html")]
struct Err8;
#[derive(Template)]
#[template(source = r#"{{ b'c }}"#, ext = "html")]
struct UnterminatedByteLiteral;
#[derive(Template)]
#[template(source = r#"{{ b'' }}"#, ext = "html")]
struct EmptyByteLiteral;
#[derive(Template)]
#[template(source = r#"{{ b'\u{}' }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralEmpty;
#[derive(Template)]
#[template(source = r#"{{ b'\u{0}' }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralMinAscii;
#[derive(Template)]
#[template(source = r#"{{ b'\u{42}' }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralRandomAscii;
#[derive(Template)]
#[template(source = r#"{{ b'\u{7f}' }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralMaxAscii;
#[derive(Template)]
#[template(source = r#"{{ b'\u{80}' }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralMinMultilingual;
#[derive(Template)]
#[template(source = r#"{{ b'\u{1234}' }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralRandomMultilingual;
#[derive(Template)]
#[template(source = r#"{{ b'\u{10ffff}' }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralMaxMultilingual;
#[derive(Template)]
#[template(source = r#"{{ a!(b'c) }}"#, ext = "html")]
struct UnterminatedByteLiteralInMacro;
#[derive(Template)]
#[template(source = r#"{{ b'' }}"#, ext = "html")]
struct EmptyByteLiteralInMacro;
#[derive(Template)]
#[template(source = r#"{{ b'\u{}' }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralEmptyInMacro;
#[derive(Template)]
#[template(source = r#"{{ a!(b'\u{0}') }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralMinAsciiInMacro;
#[derive(Template)]
#[template(source = r#"{{ a!(b'\u{42}') }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralRandomAsciiInMacro;
#[derive(Template)]
#[template(source = r#"{{ a!(b'\u{7f}') }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralMaxAsciiInMacro;
#[derive(Template)]
#[template(source = r#"{{ a!(b'\u{80}') }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralMinMultilingualInMacro;
#[derive(Template)]
#[template(source = r#"{{ a!(b'\u{1234}') }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralRandomMultilingualInMacro;
#[derive(Template)]
#[template(source = r#"{{ a!(b'\u{10ffff}') }}"#, ext = "html")]
struct UnicodeEscapeInByteLiteralMaxMultilingualInMacro;
fn main() {
}

View File

@ -61,3 +61,147 @@ error: invalid character
|
32 | #[template(source = "{% let s = 'aaa' %}", ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^
error: unterminated byte constant
--> <source attribute>:1:3
"b'c }}"
--> tests/ui/char_literal.rs:36:21
|
36 | #[template(source = r#"{{ b'c }}"#, ext = "html")]
| ^^^^^^^^^^^^^^
error: empty character literal
--> <source attribute>:1:3
"b'' }}"
--> tests/ui/char_literal.rs:40:21
|
40 | #[template(source = r#"{{ b'' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^
error: invalid character
--> <source attribute>:1:3
"b'\\u{}' }}"
--> tests/ui/char_literal.rs:44:21
|
44 | #[template(source = r#"{{ b'\u{}' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:3
"b'\\u{0}' }}"
--> tests/ui/char_literal.rs:48:21
|
48 | #[template(source = r#"{{ b'\u{0}' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:3
"b'\\u{42}' }}"
--> tests/ui/char_literal.rs:52:21
|
52 | #[template(source = r#"{{ b'\u{42}' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:3
"b'\\u{7f}' }}"
--> tests/ui/char_literal.rs:56:21
|
56 | #[template(source = r#"{{ b'\u{7f}' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:3
"b'\\u{80}' }}"
--> tests/ui/char_literal.rs:60:21
|
60 | #[template(source = r#"{{ b'\u{80}' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:3
"b'\\u{1234}' }}"
--> tests/ui/char_literal.rs:64:21
|
64 | #[template(source = r#"{{ b'\u{1234}' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:3
"b'\\u{10ffff}' }}"
--> tests/ui/char_literal.rs:68:21
|
68 | #[template(source = r#"{{ b'\u{10ffff}' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^^^^
error: unterminated byte constant
--> <source attribute>:1:6
"b'c) }}"
--> tests/ui/char_literal.rs:72:21
|
72 | #[template(source = r#"{{ a!(b'c) }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^
error: empty character literal
--> <source attribute>:1:3
"b'' }}"
--> tests/ui/char_literal.rs:76:21
|
76 | #[template(source = r#"{{ b'' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^
error: invalid character
--> <source attribute>:1:3
"b'\\u{}' }}"
--> tests/ui/char_literal.rs:80:21
|
80 | #[template(source = r#"{{ b'\u{}' }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:6
"b'\\u{0}') }}"
--> tests/ui/char_literal.rs:84:21
|
84 | #[template(source = r#"{{ a!(b'\u{0}') }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:6
"b'\\u{42}') }}"
--> tests/ui/char_literal.rs:88:21
|
88 | #[template(source = r#"{{ a!(b'\u{42}') }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:6
"b'\\u{7f}') }}"
--> tests/ui/char_literal.rs:92:21
|
92 | #[template(source = r#"{{ a!(b'\u{7f}') }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:6
"b'\\u{80}') }}"
--> tests/ui/char_literal.rs:96:21
|
96 | #[template(source = r#"{{ a!(b'\u{80}') }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:6
"b'\\u{1234}') }}"
--> tests/ui/char_literal.rs:100:21
|
100 | #[template(source = r#"{{ a!(b'\u{1234}') }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
error: cannot use unicode escape in byte string in byte literal
--> <source attribute>:1:6
"b'\\u{10ffff}') }}"
--> tests/ui/char_literal.rs:104:21
|
104 | #[template(source = r#"{{ a!(b'\u{10ffff}') }}"#, ext = "html")]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^