Improve WTF-8 comments

This commit is contained in:
Thalia Archibald 2025-02-05 14:03:50 -08:00
parent 5bc6231454
commit 09dc38f23b

View File

@ -156,9 +156,12 @@ impl ops::DerefMut for Wtf8Buf {
}
}
/// Format the string with double quotes,
/// and surrogates as `\u` followed by four hexadecimal digits.
/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800]
/// Formats the string in double quotes, with characters escaped according to
/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`,
/// where each `x` is a hexadecimal digit.
///
/// For example, the code units [U+0061, U+D800, U+000A] are formatted as
/// `"a\u{D800}\n"`.
impl fmt::Debug for Wtf8Buf {
#[inline]
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
@ -181,7 +184,7 @@ impl Wtf8Buf {
/// Creates a WTF-8 string from a WTF-8 byte vec.
///
/// Since the byte vec is not checked for valid WTF-8, this functions is
/// Since the byte vec is not checked for valid WTF-8, this function is
/// marked unsafe.
#[inline]
pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf {
@ -237,8 +240,9 @@ impl Wtf8Buf {
string
}
/// Copied from String::push
/// Appends the given `char` to the end of this string.
/// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check.
/// Copied from String::push.
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
let mut bytes = [0; 4];
let bytes = encode_utf8_raw(code_point.value, &mut bytes);
@ -264,16 +268,16 @@ impl Wtf8Buf {
///
/// # Panics
///
/// Panics if the new capacity overflows `usize`.
/// Panics if the new capacity exceeds `isize::MAX` bytes.
#[inline]
pub fn reserve(&mut self, additional: usize) {
self.bytes.reserve(additional)
}
/// Tries to reserve capacity for at least `additional` more length units
/// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid
/// frequent reallocations. After calling `try_reserve`, capacity will be
/// greater than or equal to `self.len() + additional`. Does nothing if
/// Tries to reserve capacity for at least `additional` more bytes to be
/// inserted in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to
/// avoid frequent reallocations. After calling `try_reserve`, capacity will
/// be greater than or equal to `self.len() + additional`. Does nothing if
/// capacity is already sufficient. This method preserves the contents even
/// if an error occurs.
///
@ -291,8 +295,8 @@ impl Wtf8Buf {
self.bytes.reserve_exact(additional)
}
/// Tries to reserve the minimum capacity for exactly `additional`
/// length units in the given `Wtf8Buf`. After calling
/// Tries to reserve the minimum capacity for exactly `additional` more
/// bytes to be inserted in the given `Wtf8Buf`. After calling
/// `try_reserve_exact`, capacity will be greater than or equal to
/// `self.len() + additional` if it returns `Ok(())`.
/// Does nothing if the capacity is already sufficient.
@ -450,6 +454,8 @@ impl Wtf8Buf {
match self.next_surrogate(pos) {
Some((surrogate_pos, _)) => {
pos = surrogate_pos + 3;
// Surrogates and the replacement character are all 3 bytes,
// so they can substituted in-place.
self.bytes[surrogate_pos..pos]
.copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
}
@ -535,9 +541,9 @@ impl AsInner<[u8]> for Wtf8 {
}
}
/// Format the slice with double quotes,
/// and surrogates as `\u` followed by four hexadecimal digits.
/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800]
/// Formats the string in double quotes, with characters escaped according to
/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`,
/// where each `x` is a hexadecimal digit.
impl fmt::Debug for Wtf8 {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
@ -562,6 +568,8 @@ impl fmt::Debug for Wtf8 {
}
}
/// Formats the string with unpaired surrogates substituted with the replacement
/// character, U+FFFD.
impl fmt::Display for Wtf8 {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
let wtf8_bytes = &self.bytes;