mirror of
https://github.com/rust-lang/rust.git
synced 2026-01-20 14:30:39 +00:00
Increase vtable layout size
This improves LLVM's codegen by allowing vtable loads to be hoisted out of loops (as just one example). The calculation here is an under-approximation but works for simple trait hierarchies (e.g., FnMut will be improved). We have a runtime assert that the approximation is accurate, so there's no risk of UB as a result of getting this wrong.
```rust
#[no_mangle]
pub fn foo(elements: &[u32], callback: &mut dyn Callback) {
for element in elements.iter() {
if *element != 0 {
callback.call(*element);
}
}
}
pub trait Callback {
fn call(&mut self, _: u32);
}
```
Simplifying a bit (e.g., numbering ends up different):
```diff
; Function Attrs: nonlazybind uwtable
-define void `@foo(ptr` noalias noundef nonnull readonly align 4 %elements.0, i64 noundef %elements.1, ptr noundef nonnull align 1 %callback.0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %callback.1) unnamed_addr #0 {
+define void `@foo(ptr` noalias noundef nonnull readonly align 4 %elements.0, i64 noundef %elements.1, ptr noundef nonnull align 1 %callback.0, ptr noalias nocapture noundef readonly align 8 dereferenceable(32) %callback.1) unnamed_addr #0 {
start:
%_15 = getelementptr inbounds i32, ptr %elements.0, i64 %elements.1
`@@` -13,4 +13,5 `@@`
bb4.lr.ph: ; preds = %start
%1 = getelementptr inbounds i8, ptr %callback.1, i64 24
+ %2 = load ptr, ptr %1, align 8, !nonnull !3
br label %bb4
bb6: ; preds = %bb4
- %4 = load ptr, ptr %1, align 8, !invariant.load !3, !nonnull !3
- tail call void %4(ptr noundef nonnull align 1 %callback.0, i32 noundef %_9)
+ tail call void %2(ptr noundef nonnull align 1 %callback.0, i32 noundef %_9)
br label %bb7
}
```