mirror of
https://github.com/uuid-rs/uuid.git
synced 2025-09-28 21:42:19 +00:00
Merge pull request #563 from KodrAus/chore/parser-docs
Add some inline comments to the new parser impl
This commit is contained in:
commit
f362d7d4f0
@ -51,9 +51,11 @@ impl<'a> InvalidUuid<'a> {
|
||||
|
||||
let mut hyphen_count = 0;
|
||||
let mut group_bounds = [0; 4];
|
||||
|
||||
// SAFETY: the byte array came from a valid utf8 string,
|
||||
// and is aligned along char boundries.
|
||||
let string = unsafe { std::str::from_utf8_unchecked(s) };
|
||||
|
||||
for (index, character) in string.char_indices() {
|
||||
let byte = character as u8;
|
||||
if character as u32 - byte as u32 > 0 {
|
||||
|
@ -14,15 +14,22 @@ use crate::error::InvalidUuid;
|
||||
#[inline]
|
||||
pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
|
||||
let result = match (input.len(), input.as_bytes()) {
|
||||
// Inputs of 32 bytes must be a non-hyphenated UUID
|
||||
(32, s) => parse_simple(s),
|
||||
// Hyphenated UUIDs may be wrapped in various ways:
|
||||
// - `{UUID}` for braced UUIDs
|
||||
// - `urn:uuid:UUID` for URNs
|
||||
// - `UUID` for a regular hyphenated UUID
|
||||
(36, s)
|
||||
| (38, [b'{', s @ .., b'}'])
|
||||
| (
|
||||
45,
|
||||
[b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..],
|
||||
) => parse_hyphenated(s),
|
||||
// Any other shaped input is immediately invalid
|
||||
_ => Err(()),
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(b) => Ok(b),
|
||||
Err(()) => Err(InvalidUuid(input)),
|
||||
@ -31,30 +38,54 @@ pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
|
||||
|
||||
#[inline]
|
||||
const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
|
||||
// This length check here removes all other bounds
|
||||
// checks in this function
|
||||
if s.len() != 32 {
|
||||
return Err(());
|
||||
}
|
||||
|
||||
let mut buf: [u8; 16] = [0; 16];
|
||||
let mut i = 0;
|
||||
|
||||
while i < 16 {
|
||||
// Convert a two-char hex value (like `A8`)
|
||||
// into a byte (like `10101000`)
|
||||
let h1 = HEX_TABLE[s[i * 2] as usize];
|
||||
let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
|
||||
|
||||
// We use `0xff` as a sentinel value to indicate
|
||||
// an invalid hex character sequence (like the letter `G`)
|
||||
if h1 | h2 == 0xff {
|
||||
return Err(());
|
||||
}
|
||||
|
||||
// The upper nibble needs to be shifted into position
|
||||
// to produce the final byte value
|
||||
buf[i] = SHL4_TABLE[h1 as usize] | h2;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
|
||||
// This length check here removes all other bounds
|
||||
// checks in this function
|
||||
if s.len() != 36 {
|
||||
return Err(());
|
||||
}
|
||||
|
||||
// We look at two hex-encoded values (4 chars) at a time because
|
||||
// that's the size of the smallest group in a hyphenated UUID.
|
||||
// The indexes we're interested in are:
|
||||
//
|
||||
// uuid : 936da01f-9abd-4d9d-80c7-02af85c822a8
|
||||
// | | || || || || | |
|
||||
// hyphens : | | 8| 13| 18| 23| | |
|
||||
// positions: 0 4 9 14 19 24 28 32
|
||||
|
||||
// First, ensure the hyphens appear in the right places
|
||||
match [s[8], s[13], s[18], s[23]] {
|
||||
[b'-', b'-', b'-', b'-'] => {}
|
||||
_ => return Err(()),
|
||||
@ -63,15 +94,21 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
|
||||
let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
|
||||
let mut buf: [u8; 16] = [0; 16];
|
||||
let mut j = 0;
|
||||
|
||||
while j < 8 {
|
||||
let i = positions[j];
|
||||
|
||||
// The decoding here is the same as the simple case
|
||||
// We're just dealing with two values instead of one
|
||||
let h1 = HEX_TABLE[s[i as usize] as usize];
|
||||
let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
|
||||
let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
|
||||
let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
|
||||
|
||||
if h1 | h2 | h3 | h4 == 0xff {
|
||||
return Err(());
|
||||
}
|
||||
|
||||
buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
|
||||
buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
|
||||
j += 1;
|
||||
@ -83,6 +120,7 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
|
||||
const HEX_TABLE: &[u8; 256] = &{
|
||||
let mut buf = [0; 256];
|
||||
let mut i: u8 = 0;
|
||||
|
||||
loop {
|
||||
buf[i as usize] = match i {
|
||||
b'0'..=b'9' => i - b'0',
|
||||
@ -90,9 +128,11 @@ const HEX_TABLE: &[u8; 256] = &{
|
||||
b'A'..=b'F' => i - b'A' + 10,
|
||||
_ => 0xff,
|
||||
};
|
||||
|
||||
if i == 255 {
|
||||
break buf;
|
||||
}
|
||||
|
||||
i += 1
|
||||
}
|
||||
};
|
||||
@ -100,11 +140,14 @@ const HEX_TABLE: &[u8; 256] = &{
|
||||
const SHL4_TABLE: &[u8; 256] = &{
|
||||
let mut buf = [0; 256];
|
||||
let mut i: u8 = 0;
|
||||
|
||||
loop {
|
||||
buf[i as usize] = i.wrapping_shl(4);
|
||||
|
||||
if i == 255 {
|
||||
break buf;
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
};
|
||||
|
50
src/fmt.rs
50
src/fmt.rs
@ -12,7 +12,7 @@
|
||||
//! Adapters for various formats for UUIDs
|
||||
|
||||
use crate::{
|
||||
std::{borrow::Borrow, fmt, str},
|
||||
std::{borrow::Borrow, fmt, str, ptr},
|
||||
Uuid, Variant,
|
||||
};
|
||||
|
||||
@ -227,12 +227,14 @@ fn encode_simple<'b>(
|
||||
buffer: &'b mut [u8],
|
||||
upper: bool,
|
||||
) -> &'b mut str {
|
||||
const LEN: usize = 32;
|
||||
let buf = &mut buffer[..LEN];
|
||||
let buf = &mut buffer[..Simple::LENGTH];
|
||||
let dst = buf.as_mut_ptr();
|
||||
|
||||
// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
|
||||
// SAFETY: The encoded buffer is ASCII encoded
|
||||
unsafe {
|
||||
let dst = buf.as_mut_ptr();
|
||||
core::ptr::write(dst.cast(), format_simple(src, upper));
|
||||
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
|
||||
ptr::write(dst.cast(), format_simple(src, upper));
|
||||
str::from_utf8_unchecked_mut(buf)
|
||||
}
|
||||
}
|
||||
|
||||
@ -242,12 +244,14 @@ fn encode_hyphenated<'b>(
|
||||
buffer: &'b mut [u8],
|
||||
upper: bool,
|
||||
) -> &'b mut str {
|
||||
const LEN: usize = 36;
|
||||
let buf = &mut buffer[..LEN];
|
||||
let buf = &mut buffer[..Hyphenated::LENGTH];
|
||||
let dst = buf.as_mut_ptr();
|
||||
|
||||
// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
|
||||
// SAFETY: The encoded buffer is ASCII encoded
|
||||
unsafe {
|
||||
let dst = buf.as_mut_ptr();
|
||||
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
|
||||
ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||
str::from_utf8_unchecked_mut(buf)
|
||||
}
|
||||
}
|
||||
|
||||
@ -257,14 +261,17 @@ fn encode_braced<'b>(
|
||||
buffer: &'b mut [u8],
|
||||
upper: bool,
|
||||
) -> &'b mut str {
|
||||
const LEN: usize = 38;
|
||||
let buf = &mut buffer[..LEN];
|
||||
let buf = &mut buffer[..Braced::LENGTH];
|
||||
buf[0] = b'{';
|
||||
buf[LEN - 1] = b'}';
|
||||
buf[Braced::LENGTH - 1] = b'}';
|
||||
|
||||
// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
|
||||
// SAFETY: The encoded buffer is ASCII encoded
|
||||
unsafe {
|
||||
let dst = buf.as_mut_ptr().add(1);
|
||||
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
|
||||
|
||||
ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||
str::from_utf8_unchecked_mut(buf)
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,13 +281,16 @@ fn encode_urn<'b>(
|
||||
buffer: &'b mut [u8],
|
||||
upper: bool,
|
||||
) -> &'b mut str {
|
||||
const LEN: usize = 45;
|
||||
let buf = &mut buffer[..LEN];
|
||||
let buf = &mut buffer[..Urn::LENGTH];
|
||||
buf[..9].copy_from_slice(b"urn:uuid:");
|
||||
|
||||
// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
|
||||
// SAFETY: The encoded buffer is ASCII encoded
|
||||
unsafe {
|
||||
let dst = buf.as_mut_ptr().add(9);
|
||||
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
|
||||
|
||||
ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||
str::from_utf8_unchecked_mut(buf)
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user