mirror of
https://github.com/uuid-rs/uuid.git
synced 2025-10-03 15:55:06 +00:00
Merge pull request #563 from KodrAus/chore/parser-docs
Add some inline comments to the new parser impl
This commit is contained in:
commit
f362d7d4f0
@ -51,9 +51,11 @@ impl<'a> InvalidUuid<'a> {
|
|||||||
|
|
||||||
let mut hyphen_count = 0;
|
let mut hyphen_count = 0;
|
||||||
let mut group_bounds = [0; 4];
|
let mut group_bounds = [0; 4];
|
||||||
|
|
||||||
// SAFETY: the byte array came from a valid utf8 string,
|
// SAFETY: the byte array came from a valid utf8 string,
|
||||||
// and is aligned along char boundries.
|
// and is aligned along char boundries.
|
||||||
let string = unsafe { std::str::from_utf8_unchecked(s) };
|
let string = unsafe { std::str::from_utf8_unchecked(s) };
|
||||||
|
|
||||||
for (index, character) in string.char_indices() {
|
for (index, character) in string.char_indices() {
|
||||||
let byte = character as u8;
|
let byte = character as u8;
|
||||||
if character as u32 - byte as u32 > 0 {
|
if character as u32 - byte as u32 > 0 {
|
||||||
|
@ -14,15 +14,22 @@ use crate::error::InvalidUuid;
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
|
pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
|
||||||
let result = match (input.len(), input.as_bytes()) {
|
let result = match (input.len(), input.as_bytes()) {
|
||||||
|
// Inputs of 32 bytes must be a non-hyphenated UUID
|
||||||
(32, s) => parse_simple(s),
|
(32, s) => parse_simple(s),
|
||||||
|
// Hyphenated UUIDs may be wrapped in various ways:
|
||||||
|
// - `{UUID}` for braced UUIDs
|
||||||
|
// - `urn:uuid:UUID` for URNs
|
||||||
|
// - `UUID` for a regular hyphenated UUID
|
||||||
(36, s)
|
(36, s)
|
||||||
| (38, [b'{', s @ .., b'}'])
|
| (38, [b'{', s @ .., b'}'])
|
||||||
| (
|
| (
|
||||||
45,
|
45,
|
||||||
[b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..],
|
[b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..],
|
||||||
) => parse_hyphenated(s),
|
) => parse_hyphenated(s),
|
||||||
|
// Any other shaped input is immediately invalid
|
||||||
_ => Err(()),
|
_ => Err(()),
|
||||||
};
|
};
|
||||||
|
|
||||||
match result {
|
match result {
|
||||||
Ok(b) => Ok(b),
|
Ok(b) => Ok(b),
|
||||||
Err(()) => Err(InvalidUuid(input)),
|
Err(()) => Err(InvalidUuid(input)),
|
||||||
@ -31,30 +38,54 @@ pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
|
const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
|
||||||
|
// This length check here removes all other bounds
|
||||||
|
// checks in this function
|
||||||
if s.len() != 32 {
|
if s.len() != 32 {
|
||||||
return Err(());
|
return Err(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut buf: [u8; 16] = [0; 16];
|
let mut buf: [u8; 16] = [0; 16];
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
|
|
||||||
while i < 16 {
|
while i < 16 {
|
||||||
|
// Convert a two-char hex value (like `A8`)
|
||||||
|
// into a byte (like `10101000`)
|
||||||
let h1 = HEX_TABLE[s[i * 2] as usize];
|
let h1 = HEX_TABLE[s[i * 2] as usize];
|
||||||
let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
|
let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
|
||||||
|
|
||||||
|
// We use `0xff` as a sentinel value to indicate
|
||||||
|
// an invalid hex character sequence (like the letter `G`)
|
||||||
if h1 | h2 == 0xff {
|
if h1 | h2 == 0xff {
|
||||||
return Err(());
|
return Err(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The upper nibble needs to be shifted into position
|
||||||
|
// to produce the final byte value
|
||||||
buf[i] = SHL4_TABLE[h1 as usize] | h2;
|
buf[i] = SHL4_TABLE[h1 as usize] | h2;
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(buf)
|
Ok(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
|
const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
|
||||||
|
// This length check here removes all other bounds
|
||||||
|
// checks in this function
|
||||||
if s.len() != 36 {
|
if s.len() != 36 {
|
||||||
return Err(());
|
return Err(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We look at two hex-encoded values (4 chars) at a time because
|
||||||
|
// that's the size of the smallest group in a hyphenated UUID.
|
||||||
|
// The indexes we're interested in are:
|
||||||
|
//
|
||||||
|
// uuid : 936da01f-9abd-4d9d-80c7-02af85c822a8
|
||||||
|
// | | || || || || | |
|
||||||
|
// hyphens : | | 8| 13| 18| 23| | |
|
||||||
|
// positions: 0 4 9 14 19 24 28 32
|
||||||
|
|
||||||
|
// First, ensure the hyphens appear in the right places
|
||||||
match [s[8], s[13], s[18], s[23]] {
|
match [s[8], s[13], s[18], s[23]] {
|
||||||
[b'-', b'-', b'-', b'-'] => {}
|
[b'-', b'-', b'-', b'-'] => {}
|
||||||
_ => return Err(()),
|
_ => return Err(()),
|
||||||
@ -63,15 +94,21 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
|
|||||||
let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
|
let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
|
||||||
let mut buf: [u8; 16] = [0; 16];
|
let mut buf: [u8; 16] = [0; 16];
|
||||||
let mut j = 0;
|
let mut j = 0;
|
||||||
|
|
||||||
while j < 8 {
|
while j < 8 {
|
||||||
let i = positions[j];
|
let i = positions[j];
|
||||||
|
|
||||||
|
// The decoding here is the same as the simple case
|
||||||
|
// We're just dealing with two values instead of one
|
||||||
let h1 = HEX_TABLE[s[i as usize] as usize];
|
let h1 = HEX_TABLE[s[i as usize] as usize];
|
||||||
let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
|
let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
|
||||||
let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
|
let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
|
||||||
let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
|
let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
|
||||||
|
|
||||||
if h1 | h2 | h3 | h4 == 0xff {
|
if h1 | h2 | h3 | h4 == 0xff {
|
||||||
return Err(());
|
return Err(());
|
||||||
}
|
}
|
||||||
|
|
||||||
buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
|
buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
|
||||||
buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
|
buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
|
||||||
j += 1;
|
j += 1;
|
||||||
@ -83,6 +120,7 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
|
|||||||
const HEX_TABLE: &[u8; 256] = &{
|
const HEX_TABLE: &[u8; 256] = &{
|
||||||
let mut buf = [0; 256];
|
let mut buf = [0; 256];
|
||||||
let mut i: u8 = 0;
|
let mut i: u8 = 0;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
buf[i as usize] = match i {
|
buf[i as usize] = match i {
|
||||||
b'0'..=b'9' => i - b'0',
|
b'0'..=b'9' => i - b'0',
|
||||||
@ -90,9 +128,11 @@ const HEX_TABLE: &[u8; 256] = &{
|
|||||||
b'A'..=b'F' => i - b'A' + 10,
|
b'A'..=b'F' => i - b'A' + 10,
|
||||||
_ => 0xff,
|
_ => 0xff,
|
||||||
};
|
};
|
||||||
|
|
||||||
if i == 255 {
|
if i == 255 {
|
||||||
break buf;
|
break buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 1
|
i += 1
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -100,11 +140,14 @@ const HEX_TABLE: &[u8; 256] = &{
|
|||||||
const SHL4_TABLE: &[u8; 256] = &{
|
const SHL4_TABLE: &[u8; 256] = &{
|
||||||
let mut buf = [0; 256];
|
let mut buf = [0; 256];
|
||||||
let mut i: u8 = 0;
|
let mut i: u8 = 0;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
buf[i as usize] = i.wrapping_shl(4);
|
buf[i as usize] = i.wrapping_shl(4);
|
||||||
|
|
||||||
if i == 255 {
|
if i == 255 {
|
||||||
break buf;
|
break buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
50
src/fmt.rs
50
src/fmt.rs
@ -12,7 +12,7 @@
|
|||||||
//! Adapters for various formats for UUIDs
|
//! Adapters for various formats for UUIDs
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
std::{borrow::Borrow, fmt, str},
|
std::{borrow::Borrow, fmt, str, ptr},
|
||||||
Uuid, Variant,
|
Uuid, Variant,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -227,12 +227,14 @@ fn encode_simple<'b>(
|
|||||||
buffer: &'b mut [u8],
|
buffer: &'b mut [u8],
|
||||||
upper: bool,
|
upper: bool,
|
||||||
) -> &'b mut str {
|
) -> &'b mut str {
|
||||||
const LEN: usize = 32;
|
let buf = &mut buffer[..Simple::LENGTH];
|
||||||
let buf = &mut buffer[..LEN];
|
|
||||||
unsafe {
|
|
||||||
let dst = buf.as_mut_ptr();
|
let dst = buf.as_mut_ptr();
|
||||||
core::ptr::write(dst.cast(), format_simple(src, upper));
|
|
||||||
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
|
// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
|
||||||
|
// SAFETY: The encoded buffer is ASCII encoded
|
||||||
|
unsafe {
|
||||||
|
ptr::write(dst.cast(), format_simple(src, upper));
|
||||||
|
str::from_utf8_unchecked_mut(buf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -242,12 +244,14 @@ fn encode_hyphenated<'b>(
|
|||||||
buffer: &'b mut [u8],
|
buffer: &'b mut [u8],
|
||||||
upper: bool,
|
upper: bool,
|
||||||
) -> &'b mut str {
|
) -> &'b mut str {
|
||||||
const LEN: usize = 36;
|
let buf = &mut buffer[..Hyphenated::LENGTH];
|
||||||
let buf = &mut buffer[..LEN];
|
|
||||||
unsafe {
|
|
||||||
let dst = buf.as_mut_ptr();
|
let dst = buf.as_mut_ptr();
|
||||||
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
|
|
||||||
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
|
// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
|
||||||
|
// SAFETY: The encoded buffer is ASCII encoded
|
||||||
|
unsafe {
|
||||||
|
ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||||
|
str::from_utf8_unchecked_mut(buf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -257,14 +261,17 @@ fn encode_braced<'b>(
|
|||||||
buffer: &'b mut [u8],
|
buffer: &'b mut [u8],
|
||||||
upper: bool,
|
upper: bool,
|
||||||
) -> &'b mut str {
|
) -> &'b mut str {
|
||||||
const LEN: usize = 38;
|
let buf = &mut buffer[..Braced::LENGTH];
|
||||||
let buf = &mut buffer[..LEN];
|
|
||||||
buf[0] = b'{';
|
buf[0] = b'{';
|
||||||
buf[LEN - 1] = b'}';
|
buf[Braced::LENGTH - 1] = b'}';
|
||||||
|
|
||||||
|
// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
|
||||||
|
// SAFETY: The encoded buffer is ASCII encoded
|
||||||
unsafe {
|
unsafe {
|
||||||
let dst = buf.as_mut_ptr().add(1);
|
let dst = buf.as_mut_ptr().add(1);
|
||||||
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
|
|
||||||
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
|
ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||||
|
str::from_utf8_unchecked_mut(buf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,13 +281,16 @@ fn encode_urn<'b>(
|
|||||||
buffer: &'b mut [u8],
|
buffer: &'b mut [u8],
|
||||||
upper: bool,
|
upper: bool,
|
||||||
) -> &'b mut str {
|
) -> &'b mut str {
|
||||||
const LEN: usize = 45;
|
let buf = &mut buffer[..Urn::LENGTH];
|
||||||
let buf = &mut buffer[..LEN];
|
|
||||||
buf[..9].copy_from_slice(b"urn:uuid:");
|
buf[..9].copy_from_slice(b"urn:uuid:");
|
||||||
|
|
||||||
|
// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
|
||||||
|
// SAFETY: The encoded buffer is ASCII encoded
|
||||||
unsafe {
|
unsafe {
|
||||||
let dst = buf.as_mut_ptr().add(9);
|
let dst = buf.as_mut_ptr().add(9);
|
||||||
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
|
|
||||||
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
|
ptr::write(dst.cast(), format_hyphenated(src, upper));
|
||||||
|
str::from_utf8_unchecked_mut(buf)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user