mirror of
https://github.com/askama-rs/askama.git
synced 2025-09-30 06:21:13 +00:00
Replace from_utf8_unsafe()
with AsciiStr
This moves many unsafe block into one file.
This commit is contained in:
parent
52de203fb1
commit
84edf1cc77
@ -17,9 +17,13 @@ extend-exclude = [
|
||||
]
|
||||
|
||||
[default.extend-words]
|
||||
# It's actually called that in the ASCII standard
|
||||
Enquiry = "Enquiry"
|
||||
|
||||
# French words
|
||||
exemple = "exemple"
|
||||
existant = "existant"
|
||||
|
||||
# used in tests
|
||||
Ba = "Ba"
|
||||
fo = "fo"
|
||||
|
1
fuzzing/fuzz/src/ascii_str.rs
Symbolic link
1
fuzzing/fuzz/src/ascii_str.rs
Symbolic link
@ -0,0 +1 @@
|
||||
../../../rinja/src/ascii_str.rs
|
@ -3,6 +3,7 @@
|
||||
#![deny(unreachable_pub)]
|
||||
|
||||
pub mod all;
|
||||
mod ascii_str;
|
||||
pub mod filters;
|
||||
pub mod html;
|
||||
pub mod parser;
|
||||
|
131
rinja/src/ascii_str.rs
Normal file
131
rinja/src/ascii_str.rs
Normal file
@ -0,0 +1,131 @@
|
||||
// FIXME: Replace `AsciiChar` with `[core:ascii::Char]` once [#110998] is stable
|
||||
// [#110998]: https://github.com/rust-lang/rust/issues/110998
|
||||
|
||||
#![allow(unreachable_pub)]
|
||||
|
||||
use core::ops::{Deref, Index, IndexMut};
|
||||
|
||||
pub use _ascii_char::AsciiChar;
|
||||
|
||||
/// A string that only contains ASCII characters, same layout as [`str`].
|
||||
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[repr(transparent)]
|
||||
pub struct AsciiStr([AsciiChar]);
|
||||
|
||||
impl AsciiStr {
|
||||
#[inline]
|
||||
pub const fn new_sized<const N: usize>(src: &str) -> [AsciiChar; N] {
|
||||
if !src.is_ascii() || src.len() > N {
|
||||
panic!();
|
||||
}
|
||||
|
||||
let src = src.as_bytes();
|
||||
let mut result = [AsciiChar::NULL; N];
|
||||
let mut i = 0;
|
||||
while i < src.len() {
|
||||
result[i] = AsciiChar::new(src[i]);
|
||||
i += 1;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn from_slice(src: &[AsciiChar]) -> &Self {
|
||||
// SAFETY: `Self` is transparent over `[AsciiChar]`.
|
||||
unsafe { core::mem::transmute::<&[AsciiChar], &AsciiStr>(src) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn as_str(&self) -> &str {
|
||||
// SAFETY: `Self` has the same layout as `str`,
|
||||
// and all ASCII characters are valid UTF-8 characters.
|
||||
unsafe { core::mem::transmute::<&AsciiStr, &str>(self) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
// Must not implement `DerefMut`. Not every `char` is an ASCII character.
|
||||
impl Deref for AsciiStr {
|
||||
type Target = str;
|
||||
|
||||
#[inline]
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl<Idx> Index<Idx> for AsciiStr
|
||||
where
|
||||
[AsciiChar]: Index<Idx, Output = [AsciiChar]>,
|
||||
{
|
||||
type Output = [AsciiChar];
|
||||
|
||||
#[inline]
|
||||
fn index(&self, index: Idx) -> &Self::Output {
|
||||
&self.0[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl<Idx> IndexMut<Idx> for AsciiStr
|
||||
where
|
||||
[AsciiChar]: IndexMut<Idx, Output = [AsciiChar]>,
|
||||
{
|
||||
#[inline]
|
||||
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
|
||||
&mut self.0[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for &'static AsciiStr {
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
// SAFETY: `Self` has the same layout as `str`.
|
||||
unsafe { core::mem::transmute::<&str, &AsciiStr>("") }
|
||||
}
|
||||
}
|
||||
|
||||
impl AsciiChar {
|
||||
pub const NULL: AsciiChar = AsciiChar::new(0);
|
||||
|
||||
#[inline]
|
||||
pub const fn slice_as_bytes<const N: usize>(src: &[AsciiChar; N]) -> &[u8; N] {
|
||||
// SAFETY: `[AsciiChar]` has the same layout as `[u8]`.
|
||||
unsafe { core::mem::transmute::<&[AsciiChar; N], &[u8; N]>(src) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn two_digits(d: u32) -> [Self; 2] {
|
||||
const ALPHABET: &[u8; 10] = b"0123456789";
|
||||
|
||||
if d >= ALPHABET.len().pow(2) as u32 {
|
||||
panic!();
|
||||
}
|
||||
[
|
||||
Self::new(ALPHABET[d as usize / ALPHABET.len()]),
|
||||
Self::new(ALPHABET[d as usize % ALPHABET.len()]),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
mod _ascii_char {
|
||||
/// A character that is known to be in ASCII range, same layout as [`u8`].
|
||||
#[derive(Debug, Clone, Copy, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[repr(transparent)]
|
||||
pub struct AsciiChar(u8);
|
||||
|
||||
impl AsciiChar {
|
||||
#[inline]
|
||||
pub const fn new(c: u8) -> Self {
|
||||
if c.is_ascii() { Self(c) } else { panic!() }
|
||||
}
|
||||
}
|
||||
}
|
@ -1,9 +1,9 @@
|
||||
use core::convert::Infallible;
|
||||
use core::fmt;
|
||||
use core::mem::MaybeUninit;
|
||||
use core::str::from_utf8_unchecked;
|
||||
|
||||
use super::FastWritable;
|
||||
use crate::ascii_str::{AsciiChar, AsciiStr};
|
||||
|
||||
/// Returns adequate string representation (in KB, ..) of number of bytes
|
||||
///
|
||||
@ -58,26 +58,27 @@ impl FastWritable for FilesizeFormatFilter {
|
||||
}
|
||||
|
||||
/// Formats `buffer` to contain the decimal point, decimal places and unit
|
||||
fn format_frac(buffer: &mut MaybeUninit<[u8; 8]>, prefix: u8, scaled: u32) -> &str {
|
||||
// LLVM generates better byte code for register sized buffers, so we add some NULs
|
||||
let buffer = buffer.write(*b"..0 kB\0\0");
|
||||
fn format_frac(buffer: &mut MaybeUninit<[AsciiChar; 8]>, prefix: AsciiChar, scaled: u32) -> &str {
|
||||
// LLVM generates better byte code for register sized buffers
|
||||
let buffer = buffer.write(AsciiStr::new_sized("..0 kB"));
|
||||
buffer[4] = prefix;
|
||||
|
||||
let frac = scaled % 100;
|
||||
let buffer = if frac == 0 {
|
||||
&buffer[3..6]
|
||||
} else if frac % 10 == 0 {
|
||||
// the decimal separator '.' is already contained in buffer[1]
|
||||
buffer[2] = b'0' + (frac / 10) as u8;
|
||||
&buffer[1..6]
|
||||
} else {
|
||||
// the decimal separator '.' is already contained in buffer[0]
|
||||
buffer[1] = b'0' + (frac / 10) as u8;
|
||||
buffer[2] = b'0' + (frac % 10) as u8;
|
||||
&buffer[0..6]
|
||||
let digits = AsciiChar::two_digits(frac);
|
||||
if digits[1] == AsciiChar::new(b'0') {
|
||||
// the decimal separator '.' is already contained in buffer[1]
|
||||
buffer[2] = digits[0];
|
||||
&buffer[1..6]
|
||||
} else {
|
||||
// the decimal separator '.' is already contained in buffer[0]
|
||||
[buffer[1], buffer[2]] = digits;
|
||||
&buffer[0..6]
|
||||
}
|
||||
};
|
||||
// SAFETY: we know that the buffer contains only ASCII data
|
||||
unsafe { from_utf8_unchecked(buffer) }
|
||||
AsciiStr::from_slice(buffer).as_str()
|
||||
}
|
||||
|
||||
#[cold]
|
||||
@ -87,17 +88,17 @@ fn too_big<W: fmt::Write + ?Sized>(value: f32, dest: &mut W) -> crate::Result<()
|
||||
}
|
||||
|
||||
/// `((si_prefix, factor), limit)`, the factor is offset by 10**2 to account for 2 decimal places
|
||||
const SI_PREFIXES: &[((u8, f32), f32)] = &[
|
||||
((b'k', 1e-1), 1e6),
|
||||
((b'M', 1e-4), 1e9),
|
||||
((b'G', 1e-7), 1e12),
|
||||
((b'T', 1e-10), 1e15),
|
||||
((b'P', 1e-13), 1e18),
|
||||
((b'E', 1e-16), 1e21),
|
||||
((b'Z', 1e-19), 1e24),
|
||||
((b'Y', 1e-22), 1e27),
|
||||
((b'R', 1e-25), 1e30),
|
||||
((b'Q', 1e-28), 1e33),
|
||||
const SI_PREFIXES: &[((AsciiChar, f32), f32)] = &[
|
||||
((AsciiChar::new(b'k'), 1e-1), 1e6),
|
||||
((AsciiChar::new(b'M'), 1e-4), 1e9),
|
||||
((AsciiChar::new(b'G'), 1e-7), 1e12),
|
||||
((AsciiChar::new(b'T'), 1e-10), 1e15),
|
||||
((AsciiChar::new(b'P'), 1e-13), 1e18),
|
||||
((AsciiChar::new(b'E'), 1e-16), 1e21),
|
||||
((AsciiChar::new(b'Z'), 1e-19), 1e24),
|
||||
((AsciiChar::new(b'Y'), 1e-22), 1e27),
|
||||
((AsciiChar::new(b'R'), 1e-25), 1e30),
|
||||
((AsciiChar::new(b'Q'), 1e-28), 1e33),
|
||||
];
|
||||
|
||||
#[test]
|
||||
|
@ -5,6 +5,8 @@
|
||||
|
||||
use core::{fmt, str};
|
||||
|
||||
use crate::ascii_str::{AsciiChar, AsciiStr};
|
||||
|
||||
#[allow(unused)]
|
||||
pub(crate) fn write_escaped_str(mut dest: impl fmt::Write, src: &str) -> fmt::Result {
|
||||
// This implementation reads one byte after another.
|
||||
@ -17,8 +19,7 @@ pub(crate) fn write_escaped_str(mut dest: impl fmt::Write, src: &str) -> fmt::Re
|
||||
if let Some(escaped) = get_escaped(byte) {
|
||||
[escaped_buf[2], escaped_buf[3]] = escaped;
|
||||
write_str_if_nonempty(&mut dest, &src[last..index])?;
|
||||
// SAFETY: the content of `escaped_buf` is pure ASCII
|
||||
dest.write_str(unsafe { str::from_utf8_unchecked(&escaped_buf[..ESCAPED_BUF_LEN]) })?;
|
||||
dest.write_str(AsciiStr::from_slice(&escaped_buf[..ESCAPED_BUF_LEN]))?;
|
||||
last = index + 1;
|
||||
}
|
||||
}
|
||||
@ -32,8 +33,7 @@ pub(crate) fn write_escaped_char(mut dest: impl fmt::Write, c: char) -> fmt::Res
|
||||
} else if let Some(escaped) = get_escaped(c as u8) {
|
||||
let mut escaped_buf = ESCAPED_BUF_INIT;
|
||||
[escaped_buf[2], escaped_buf[3]] = escaped;
|
||||
// SAFETY: the content of `escaped_buf` is pure ASCII
|
||||
dest.write_str(unsafe { str::from_utf8_unchecked(&escaped_buf[..ESCAPED_BUF_LEN]) })
|
||||
dest.write_str(AsciiStr::from_slice(&escaped_buf[..ESCAPED_BUF_LEN]))
|
||||
} else {
|
||||
// RATIONALE: `write_char(c)` gets optimized if it is known that `c.is_ascii()`
|
||||
dest.write_char(c)
|
||||
@ -42,10 +42,12 @@ pub(crate) fn write_escaped_char(mut dest: impl fmt::Write, c: char) -> fmt::Res
|
||||
|
||||
/// Returns the decimal representation of the codepoint if the character needs HTML escaping.
|
||||
#[inline]
|
||||
fn get_escaped(byte: u8) -> Option<[u8; 2]> {
|
||||
match byte {
|
||||
MIN_CHAR..=MAX_CHAR => Some(TABLE.0[(byte - MIN_CHAR) as usize]?.to_bytes()),
|
||||
_ => None,
|
||||
fn get_escaped(byte: u8) -> Option<[AsciiChar; 2]> {
|
||||
if let MIN_CHAR..=MAX_CHAR = byte {
|
||||
let entry = TABLE.0[(byte - MIN_CHAR) as usize];
|
||||
(entry != UNESCAPED).then_some(entry)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@ -91,63 +93,27 @@ const MAX_CHAR: u8 = {
|
||||
const CHAR_RANGE: usize = (MAX_CHAR - MIN_CHAR + 1) as usize;
|
||||
|
||||
#[repr(align(64))]
|
||||
struct Table([Option<Digits>; CHAR_RANGE]);
|
||||
struct Table([[AsciiChar; 2]; CHAR_RANGE]);
|
||||
|
||||
/// For characters that need HTML escaping, the codepoint is formatted as decimal digits,
|
||||
/// otherwise `b"\0\0"`. Starting at [`MIN_CHAR`].
|
||||
const TABLE: &Table = &{
|
||||
let mut table = Table([None; CHAR_RANGE]);
|
||||
let mut table = Table([UNESCAPED; CHAR_RANGE]);
|
||||
let mut i = 0;
|
||||
while i < CHARS.len() {
|
||||
let c = CHARS[i];
|
||||
table.0[(c - MIN_CHAR) as usize] = Some(Digits::new(c));
|
||||
table.0[c as u32 as usize - MIN_CHAR as usize] = AsciiChar::two_digits(c as u32);
|
||||
i += 1;
|
||||
}
|
||||
table
|
||||
};
|
||||
|
||||
const UNESCAPED: [AsciiChar; 2] = AsciiStr::new_sized("");
|
||||
|
||||
const ESCAPED_BUF_INIT_UNPADDED: &str = "&#__;";
|
||||
// RATIONALE: llvm generates better code if the buffer is register sized
|
||||
const ESCAPED_BUF_INIT: [u8; 8] = *b"&#__;\0\0\0";
|
||||
const ESCAPED_BUF_LEN: usize = b"&#__;".len();
|
||||
|
||||
/// All possible decimal representations of codepoints that need escaping in HTML / XML.
|
||||
///
|
||||
/// Using this type instead of e.g. `Option<NonZeroU16>` allows rustc to select any and all of the
|
||||
/// 65,531 unused representations of this type as niche, which can can help speeding up the
|
||||
/// generated byte code. If not all needed representations were present, then the `TABLE` definition
|
||||
/// would fail to compile.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[repr(u16)]
|
||||
enum Digits {
|
||||
/// `'"'`
|
||||
V34 = u16::from_ne_bytes(*b"34"),
|
||||
/// `'&'`
|
||||
V38 = u16::from_ne_bytes(*b"38"),
|
||||
/// `'\''`
|
||||
V39 = u16::from_ne_bytes(*b"39"),
|
||||
/// `'<'`
|
||||
V60 = u16::from_ne_bytes(*b"60"),
|
||||
/// `'>'`
|
||||
V62 = u16::from_ne_bytes(*b"62"),
|
||||
}
|
||||
|
||||
impl Digits {
|
||||
#[inline]
|
||||
const fn to_bytes(self) -> [u8; 2] {
|
||||
(self as u16).to_ne_bytes()
|
||||
}
|
||||
|
||||
const fn new(v: u8) -> Self {
|
||||
match v {
|
||||
34 => Self::V34,
|
||||
38 => Self::V38,
|
||||
39 => Self::V39,
|
||||
60 => Self::V60,
|
||||
62 => Self::V62,
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
const ESCAPED_BUF_INIT: [AsciiChar; 8] = AsciiStr::new_sized(ESCAPED_BUF_INIT_UNPADDED);
|
||||
const ESCAPED_BUF_LEN: usize = ESCAPED_BUF_INIT_UNPADDED.len();
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "alloc")]
|
||||
|
@ -65,6 +65,7 @@ extern crate alloc;
|
||||
#[cfg(feature = "std")]
|
||||
extern crate std;
|
||||
|
||||
mod ascii_str;
|
||||
mod error;
|
||||
pub mod filters;
|
||||
#[doc(hidden)]
|
||||
|
@ -14,6 +14,7 @@ use parser::{
|
||||
};
|
||||
use rustc_hash::FxBuildHasher;
|
||||
|
||||
use crate::ascii_str::{AsciiChar, AsciiStr};
|
||||
use crate::heritage::{Context, Heritage};
|
||||
use crate::html::write_escaped_str;
|
||||
use crate::input::{Source, TemplateInput};
|
||||
@ -648,18 +649,14 @@ fn normalize_identifier(ident: &str) -> &str {
|
||||
}
|
||||
let kws = RUST_KEYWORDS[ident.len()];
|
||||
|
||||
let mut padded_ident = [b'_'; MAX_RUST_KEYWORD_LEN];
|
||||
let mut padded_ident = [0; MAX_RUST_KEYWORD_LEN];
|
||||
padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
|
||||
|
||||
// Since the individual buckets are quite short, a linear search is faster than a binary search.
|
||||
let replacement = match kws
|
||||
.iter()
|
||||
.find(|probe| padded_ident == <[u8; MAX_RUST_KEYWORD_LEN]>::try_from(&probe[2..]).unwrap())
|
||||
{
|
||||
Some(replacement) => replacement,
|
||||
None => return ident,
|
||||
};
|
||||
|
||||
// SAFETY: We know that the input byte slice is pure-ASCII.
|
||||
unsafe { std::str::from_utf8_unchecked(&replacement[..ident.len() + 2]) }
|
||||
for probe in kws {
|
||||
if padded_ident == *AsciiChar::slice_as_bytes(probe[2..].try_into().unwrap()) {
|
||||
return AsciiStr::from_slice(&probe[..ident.len() + 2]);
|
||||
}
|
||||
}
|
||||
ident
|
||||
}
|
||||
|
@ -18,12 +18,7 @@ use std::hash::{BuildHasher, Hash};
|
||||
use std::path::Path;
|
||||
use std::sync::Mutex;
|
||||
|
||||
use config::{Config, read_config_file};
|
||||
use generator::{TmplKind, template_to_string};
|
||||
use heritage::{Context, Heritage};
|
||||
use input::{AnyTemplateArgs, Print, TemplateArgs, TemplateInput};
|
||||
use integration::{Buffer, build_template_enum};
|
||||
use parser::{Parsed, strip_common};
|
||||
use parser::{Parsed, ascii_str, strip_common};
|
||||
#[cfg(not(feature = "__standalone"))]
|
||||
use proc_macro::TokenStream as TokenStream12;
|
||||
#[cfg(feature = "__standalone")]
|
||||
@ -32,6 +27,12 @@ use proc_macro2::{Delimiter, Group, Span, TokenStream, TokenTree};
|
||||
use quote::{quote, quote_spanned};
|
||||
use rustc_hash::FxBuildHasher;
|
||||
|
||||
use crate::config::{Config, read_config_file};
|
||||
use crate::generator::{TmplKind, template_to_string};
|
||||
use crate::heritage::{Context, Heritage};
|
||||
use crate::input::{AnyTemplateArgs, Print, TemplateArgs, TemplateInput};
|
||||
use crate::integration::{Buffer, build_template_enum};
|
||||
|
||||
/// The `Template` derive macro and its `template()` attribute.
|
||||
///
|
||||
/// Rinja works by generating one or more trait implementations for any
|
||||
|
1
rinja_parser/src/ascii_str.rs
Symbolic link
1
rinja_parser/src/ascii_str.rs
Symbolic link
@ -0,0 +1 @@
|
||||
../../rinja/src/ascii_str.rs
|
@ -2,6 +2,14 @@
|
||||
#![deny(elided_lifetimes_in_paths)]
|
||||
#![deny(unreachable_pub)]
|
||||
|
||||
pub mod ascii_str;
|
||||
pub mod expr;
|
||||
mod memchr_splitter;
|
||||
pub mod node;
|
||||
mod target;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::cell::Cell;
|
||||
use std::env::current_dir;
|
||||
@ -17,16 +25,10 @@ use winnow::stream::{AsChar, Stream as _};
|
||||
use winnow::token::{any, one_of, take_till, take_while};
|
||||
use winnow::{ModalParser, Parser};
|
||||
|
||||
pub mod expr;
|
||||
pub use expr::{Attr, Expr, Filter, TyGenerics};
|
||||
mod memchr_splitter;
|
||||
pub mod node;
|
||||
pub use node::Node;
|
||||
|
||||
mod target;
|
||||
pub use target::Target;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
use crate::ascii_str::{AsciiChar, AsciiStr};
|
||||
pub use crate::expr::{Attr, Expr, Filter, TyGenerics};
|
||||
pub use crate::node::Node;
|
||||
pub use crate::target::Target;
|
||||
|
||||
mod _parsed {
|
||||
use std::path::Path;
|
||||
@ -1167,105 +1169,133 @@ const PRIMITIVE_TYPES: &[&str] = &{
|
||||
list
|
||||
};
|
||||
|
||||
#[doc(hidden)]
|
||||
pub const MAX_RUST_KEYWORD_LEN: usize = 8;
|
||||
const MAX_RUST_RAW_KEYWORD_LEN: usize = MAX_RUST_KEYWORD_LEN + 2;
|
||||
#[doc(hidden)]
|
||||
pub const RUST_KEYWORDS: &[&[[u8; MAX_RUST_RAW_KEYWORD_LEN]]] = {
|
||||
// FIXME: Replace `u8` with `[core:ascii::Char; MAX_REPL_LEN]` once
|
||||
// <https://github.com/rust-lang/rust/issues/110998> is stable.
|
||||
pub const MAX_RUST_RAW_KEYWORD_LEN: usize = MAX_RUST_KEYWORD_LEN + 2;
|
||||
|
||||
const KW2: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
*b"r#as______",
|
||||
*b"r#do______",
|
||||
*b"r#fn______",
|
||||
*b"r#if______",
|
||||
*b"r#in______",
|
||||
pub const RUST_KEYWORDS: &[&[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]]; MAX_RUST_KEYWORD_LEN + 1] = &{
|
||||
const NO_KWS: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &[];
|
||||
const KW2: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
AsciiStr::new_sized("r#as"),
|
||||
AsciiStr::new_sized("r#do"),
|
||||
AsciiStr::new_sized("r#fn"),
|
||||
AsciiStr::new_sized("r#if"),
|
||||
AsciiStr::new_sized("r#in"),
|
||||
];
|
||||
const KW3: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
*b"r#box_____",
|
||||
*b"r#dyn_____",
|
||||
*b"r#for_____",
|
||||
*b"r#gen_____",
|
||||
*b"r#let_____",
|
||||
*b"r#mod_____",
|
||||
*b"r#mut_____",
|
||||
*b"r#pub_____",
|
||||
*b"r#ref_____",
|
||||
*b"r#try_____",
|
||||
*b"r#use_____",
|
||||
const KW3: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
AsciiStr::new_sized("r#box"),
|
||||
AsciiStr::new_sized("r#dyn"),
|
||||
AsciiStr::new_sized("r#for"),
|
||||
AsciiStr::new_sized("r#gen"),
|
||||
AsciiStr::new_sized("r#let"),
|
||||
AsciiStr::new_sized("r#mod"),
|
||||
AsciiStr::new_sized("r#mut"),
|
||||
AsciiStr::new_sized("r#pub"),
|
||||
AsciiStr::new_sized("r#ref"),
|
||||
AsciiStr::new_sized("r#try"),
|
||||
AsciiStr::new_sized("r#use"),
|
||||
];
|
||||
const KW4: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
*b"r#else____",
|
||||
*b"r#enum____",
|
||||
*b"r#impl____",
|
||||
*b"r#move____",
|
||||
*b"r#priv____",
|
||||
*b"r#true____",
|
||||
*b"r#type____",
|
||||
const KW4: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
AsciiStr::new_sized("r#else"),
|
||||
AsciiStr::new_sized("r#enum"),
|
||||
AsciiStr::new_sized("r#impl"),
|
||||
AsciiStr::new_sized("r#move"),
|
||||
AsciiStr::new_sized("r#priv"),
|
||||
AsciiStr::new_sized("r#true"),
|
||||
AsciiStr::new_sized("r#type"),
|
||||
];
|
||||
const KW5: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
*b"r#async___",
|
||||
*b"r#await___",
|
||||
*b"r#break___",
|
||||
*b"r#const___",
|
||||
*b"r#crate___",
|
||||
*b"r#false___",
|
||||
*b"r#final___",
|
||||
*b"r#macro___",
|
||||
*b"r#match___",
|
||||
*b"r#trait___",
|
||||
*b"r#where___",
|
||||
*b"r#while___",
|
||||
*b"r#yield___",
|
||||
const KW5: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
AsciiStr::new_sized("r#async"),
|
||||
AsciiStr::new_sized("r#await"),
|
||||
AsciiStr::new_sized("r#break"),
|
||||
AsciiStr::new_sized("r#const"),
|
||||
AsciiStr::new_sized("r#crate"),
|
||||
AsciiStr::new_sized("r#false"),
|
||||
AsciiStr::new_sized("r#final"),
|
||||
AsciiStr::new_sized("r#macro"),
|
||||
AsciiStr::new_sized("r#match"),
|
||||
AsciiStr::new_sized("r#trait"),
|
||||
AsciiStr::new_sized("r#where"),
|
||||
AsciiStr::new_sized("r#while"),
|
||||
AsciiStr::new_sized("r#yield"),
|
||||
];
|
||||
const KW6: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
*b"r#become__",
|
||||
*b"r#extern__",
|
||||
*b"r#return__",
|
||||
*b"r#static__",
|
||||
*b"r#struct__",
|
||||
*b"r#typeof__",
|
||||
*b"r#unsafe__",
|
||||
const KW6: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
AsciiStr::new_sized("r#become"),
|
||||
AsciiStr::new_sized("r#extern"),
|
||||
AsciiStr::new_sized("r#return"),
|
||||
AsciiStr::new_sized("r#static"),
|
||||
AsciiStr::new_sized("r#struct"),
|
||||
AsciiStr::new_sized("r#typeof"),
|
||||
AsciiStr::new_sized("r#unsafe"),
|
||||
];
|
||||
const KW7: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
AsciiStr::new_sized("r#unsized"),
|
||||
AsciiStr::new_sized("r#virtual"),
|
||||
];
|
||||
const KW8: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &[
|
||||
AsciiStr::new_sized("r#abstract"),
|
||||
AsciiStr::new_sized("r#continue"),
|
||||
AsciiStr::new_sized("r#override"),
|
||||
];
|
||||
const KW7: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] = &[*b"r#unsized_", *b"r#virtual_"];
|
||||
const KW8: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] =
|
||||
&[*b"r#abstract", *b"r#continue", *b"r#override"];
|
||||
|
||||
&[&[], &[], KW2, KW3, KW4, KW5, KW6, KW7, KW8]
|
||||
[NO_KWS, NO_KWS, KW2, KW3, KW4, KW5, KW6, KW7, KW8]
|
||||
};
|
||||
|
||||
// These ones are only used in the parser, hence why they're private.
|
||||
const KWS_EXTRA: &[&[[u8; MAX_RUST_RAW_KEYWORD_LEN]]] = {
|
||||
const KW4: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] =
|
||||
&[*b"r#loop____", *b"r#self____", *b"r#Self____"];
|
||||
const KW5: &[[u8; MAX_RUST_RAW_KEYWORD_LEN]] = &[*b"r#super___", *b"r#union___"];
|
||||
const KWS_PARSER: &[&[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]]; MAX_RUST_KEYWORD_LEN + 1] = &{
|
||||
const KW4: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &{
|
||||
let mut result = [AsciiStr::new_sized("r#"); RUST_KEYWORDS[4].len() + 3];
|
||||
let mut i = 0;
|
||||
while i < RUST_KEYWORDS[4].len() {
|
||||
result[i] = RUST_KEYWORDS[4][i];
|
||||
i += 1;
|
||||
}
|
||||
result[result.len() - 3] = AsciiStr::new_sized("r#loop");
|
||||
result[result.len() - 2] = AsciiStr::new_sized("r#self");
|
||||
result[result.len() - 1] = AsciiStr::new_sized("r#Self");
|
||||
result
|
||||
};
|
||||
const KW5: &[[AsciiChar; MAX_RUST_RAW_KEYWORD_LEN]] = &{
|
||||
let mut result = [AsciiStr::new_sized("r#"); RUST_KEYWORDS[5].len() + 2];
|
||||
let mut i = 0;
|
||||
while i < RUST_KEYWORDS[5].len() {
|
||||
result[i] = RUST_KEYWORDS[5][i];
|
||||
i += 1;
|
||||
}
|
||||
result[result.len() - 2] = AsciiStr::new_sized("r#super");
|
||||
result[result.len() - 1] = AsciiStr::new_sized("r#union");
|
||||
result
|
||||
};
|
||||
|
||||
&[&[], &[], &[], &[], KW4, KW5, &[], &[], &[]]
|
||||
[
|
||||
RUST_KEYWORDS[0],
|
||||
RUST_KEYWORDS[1],
|
||||
RUST_KEYWORDS[2],
|
||||
RUST_KEYWORDS[3],
|
||||
KW4,
|
||||
KW5,
|
||||
RUST_KEYWORDS[6],
|
||||
RUST_KEYWORDS[7],
|
||||
RUST_KEYWORDS[8],
|
||||
]
|
||||
};
|
||||
|
||||
fn is_rust_keyword(ident: &str) -> bool {
|
||||
fn is_rust_keyword_inner(
|
||||
kws: &[&[[u8; MAX_RUST_RAW_KEYWORD_LEN]]],
|
||||
padded_ident: &[u8; MAX_RUST_KEYWORD_LEN],
|
||||
ident_len: usize,
|
||||
) -> bool {
|
||||
// Since the individual buckets are quite short, a linear search is faster than a binary search.
|
||||
kws[ident_len]
|
||||
.iter()
|
||||
.any(|&probe| padded_ident == &probe[2..])
|
||||
}
|
||||
|
||||
let ident_len = ident.len();
|
||||
if ident_len > MAX_RUST_KEYWORD_LEN {
|
||||
return false;
|
||||
}
|
||||
let kws = KWS_PARSER[ident.len()];
|
||||
|
||||
let mut padded_ident = [b'_'; MAX_RUST_KEYWORD_LEN];
|
||||
padded_ident[..ident.len()].copy_from_slice(ident.as_bytes());
|
||||
let mut padded_ident = [0; MAX_RUST_KEYWORD_LEN];
|
||||
padded_ident[..ident_len].copy_from_slice(ident.as_bytes());
|
||||
|
||||
is_rust_keyword_inner(RUST_KEYWORDS, &padded_ident, ident_len)
|
||||
|| is_rust_keyword_inner(KWS_EXTRA, &padded_ident, ident_len)
|
||||
// Since the individual buckets are quite short, a linear search is faster than a binary search.
|
||||
for probe in kws {
|
||||
if padded_ident == *AsciiChar::slice_as_bytes(probe[2..].try_into().unwrap()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
@ -1275,15 +1305,6 @@ mod test {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[track_caller]
|
||||
fn ensure_utf8_inner(entry: &[&[[u8; MAX_RUST_RAW_KEYWORD_LEN]]]) {
|
||||
for kws in entry {
|
||||
for kw in *kws {
|
||||
assert!(std::str::from_utf8(kw).is_ok(), "not UTF-8: {kw:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strip_common() {
|
||||
// Full path is returned instead of empty when the entire path is in common.
|
||||
@ -1470,14 +1491,6 @@ mod test {
|
||||
);
|
||||
}
|
||||
|
||||
// Ensure that all raw keyword strings are UTF-8, because we use `from_utf8_unchecked()`.
|
||||
#[test]
|
||||
fn ensure_utf8() {
|
||||
assert_eq!(RUST_KEYWORDS.len(), KWS_EXTRA.len());
|
||||
ensure_utf8_inner(RUST_KEYWORDS);
|
||||
ensure_utf8_inner(KWS_EXTRA);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_rust_keyword() {
|
||||
assert!(is_rust_keyword("super"));
|
||||
|
Loading…
x
Reference in New Issue
Block a user