mirror of
https://github.com/rust-embedded/heapless.git
synced 2025-09-27 20:40:32 +00:00
optimize the codegen of Vec::clone
these changes optimize `Vec<u8, 1024>::clone` down to these operations 1. reserve the stack space (1028 bytes on 32-bit ARM) and leave it uninitialized 2. zero the `len` field 3. memcpy `len` bytes of data from the parent analyzed source code ``` rust use heapless::Vec; fn clone(vec: &Vec<u8, 1024>) { let mut vec = vec.clone(); black_box(&mut vec); } fn black_box<T>(val: &mut T) { unsafe { asm!("// {0}", in(reg) val) } } ``` machine code with `lto = fat`, `codegen-units = 1` and `opt-level = 'z'` ('z' instead of 3 to avoid loop unrolling and keep the machine code readable) ``` armasm 00020100 <clone>: 20100: b5d0 push {r4, r6, r7, lr} 20102: af02 add r7, sp, #8 20104: f5ad 6d81 sub.w sp, sp, #1032 ; 0x408 20108: 2300 movs r3, #0 2010a: c802 ldmia r0!, {r1} 2010c: 9301 str r3, [sp, #4] 2010e: aa01 add r2, sp, #4 20110: /--/-X b141 cbz r1, 20124 <clone+0x24> 20112: | | 4413 add r3, r2 20114: | | f810 4b01 ldrb.w r4, [r0], #1 20118: | | 3901 subs r1, #1 2011a: | | 711c strb r4, [r3, #4] 2011c: | | 9b01 ldr r3, [sp, #4] 2011e: | | 3301 adds r3, #1 20120: | | 9301 str r3, [sp, #4] 20122: | \-- e7f5 b.n 20110 <clone+0x10> 20124: \----> a801 add r0, sp, #4 20126: f50d 6d81 add.w sp, sp, #1032 ; 0x408 2012a: bdd0 pop {r4, r6, r7, pc} ``` note that it's not optimizing step (3) to an actual `memcpy` because we lack the 'trait specialization' code that libstd uses --- before `clone` was optimized to 1. reserve and zero (`memclr`) 1028 (!?) bytes of stack space 2. (unnecessarily) runtime check if `len` is equal or less than 1024 (capacity) -- this included a panicking branch 3. memcpy `len` bytes of data from the parent
This commit is contained in:
parent
aacc35997b
commit
b2268e7885
@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
|
||||
|
||||
### Changed
|
||||
|
||||
* Optimize the codegen of `Vec::clone`
|
||||
|
||||
### Fixed
|
||||
|
||||
* Inserting an item that replaces an already present item will no longer
|
||||
|
19
src/vec.rs
19
src/vec.rs
@ -34,12 +34,18 @@ use hash32;
|
||||
/// assert_eq!(*vec, [7, 1, 2, 3]);
|
||||
/// ```
|
||||
pub struct Vec<T, const N: usize> {
|
||||
buffer: [MaybeUninit<T>; N],
|
||||
// NOTE order is important for optimizations. the `len` first layout lets the compiler optimize
|
||||
// `new` to: reserve stack space and zero the first word. With the fields in the reverse order
|
||||
// the compiler optimizes `new` to `memclr`-ing the *entire* stack space, including the `buffer`
|
||||
// field which should be left uninitialized. Optimizations were last checked with Rust 1.60
|
||||
len: usize,
|
||||
|
||||
buffer: [MaybeUninit<T>; N],
|
||||
}
|
||||
|
||||
impl<T, const N: usize> Vec<T, N> {
|
||||
const INIT: MaybeUninit<T> = MaybeUninit::uninit();
|
||||
const ELEM: MaybeUninit<T> = MaybeUninit::uninit();
|
||||
const INIT: [MaybeUninit<T>; N] = [Self::ELEM; N]; // important for optimization of `new`
|
||||
|
||||
/// Constructs a new, empty vector with a fixed capacity of `N`
|
||||
///
|
||||
@ -60,8 +66,8 @@ impl<T, const N: usize> Vec<T, N> {
|
||||
crate::sealed::greater_than_eq_0::<N>();
|
||||
|
||||
Self {
|
||||
buffer: [Self::INIT; N],
|
||||
len: 0,
|
||||
buffer: Self::INIT,
|
||||
}
|
||||
}
|
||||
|
||||
@ -92,7 +98,12 @@ impl<T, const N: usize> Vec<T, N> {
|
||||
T: Clone,
|
||||
{
|
||||
let mut new = Self::new();
|
||||
new.extend_from_slice(self.as_slice()).unwrap();
|
||||
// avoid `extend_from_slice` as that introduces a runtime check / panicking branch
|
||||
for elem in self {
|
||||
unsafe {
|
||||
new.push_unchecked(elem.clone());
|
||||
}
|
||||
}
|
||||
new
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user