mirror of
				https://github.com/rust-lang/rust.git
				synced 2025-10-31 13:04:42 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			117 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			117 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
| //@ compile-flags: -Copt-level=3
 | |
| //@ only-x86_64
 | |
| 
 | |
| #![crate_type = "lib"]
 | |
| 
 | |
| use std::mem::swap;
 | |
| use std::ptr::{copy_nonoverlapping, read, write};
 | |
| 
 | |
| type KeccakBuffer = [[u64; 5]; 5];
 | |
| 
 | |
| // A basic read+copy+write swap implementation ends up copying one of the values
 | |
| // to stack for large types, which is completely unnecessary as the lack of
 | |
| // overlap means we can just do whatever fits in registers at a time.
 | |
| 
 | |
| // The tests here (after the first one showing that the problem still exists)
 | |
| // are less about testing *exactly* what the codegen is, and more about testing
 | |
| // 1) That things are swapped directly from one argument to the other,
 | |
| //    never going through stack along the way, and
 | |
| // 2) That we're doing the swapping for big things using large vector types,
 | |
| //    rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time.
 | |
| //
 | |
| // (There are separate tests for intrinsics::typed_swap_nonoverlapping that
 | |
| //  check that it, as an intrinsic, are emitting exactly what it should.)
 | |
| 
 | |
| // CHECK-LABEL: @swap_basic
 | |
| #[no_mangle]
 | |
| pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
 | |
|     // CHECK: alloca [200 x i8]
 | |
| 
 | |
|     // SAFETY: exclusive references are always valid to read/write,
 | |
|     // are non-overlapping, and nothing here panics so it's drop-safe.
 | |
|     unsafe {
 | |
|         let z = read(x);
 | |
|         copy_nonoverlapping(y, x, 1);
 | |
|         write(y, z);
 | |
|     }
 | |
| }
 | |
| 
 | |
| // CHECK-LABEL: @swap_std
 | |
| #[no_mangle]
 | |
| pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
 | |
|     // CHECK-NOT: alloca
 | |
|     // CHECK: load <{{2|4}} x i64>
 | |
|     // CHECK: store <{{2|4}} x i64>
 | |
|     swap(x, y)
 | |
| }
 | |
| 
 | |
| // CHECK-LABEL: @swap_slice
 | |
| #[no_mangle]
 | |
| pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
 | |
|     // CHECK-NOT: alloca
 | |
|     // CHECK: load <{{2|4}} x i64>
 | |
|     // CHECK: store <{{2|4}} x i64>
 | |
|     if x.len() == y.len() {
 | |
|         x.swap_with_slice(y);
 | |
|     }
 | |
| }
 | |
| 
 | |
| type OneKilobyteBuffer = [u8; 1024];
 | |
| 
 | |
| // CHECK-LABEL: @swap_1kb_slices
 | |
| #[no_mangle]
 | |
| pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
 | |
|     // CHECK-NOT: alloca
 | |
| 
 | |
|     // CHECK-NOT: load i32
 | |
|     // CHECK-NOT: store i32
 | |
|     // CHECK-NOT: load i16
 | |
|     // CHECK-NOT: store i16
 | |
|     // CHECK-NOT: load i8
 | |
|     // CHECK-NOT: store i8
 | |
| 
 | |
|     // CHECK: load <{{2|4}} x i64>{{.+}}align 1,
 | |
|     // CHECK: store <{{2|4}} x i64>{{.+}}align 1,
 | |
| 
 | |
|     // CHECK-NOT: load i32
 | |
|     // CHECK-NOT: store i32
 | |
|     // CHECK-NOT: load i16
 | |
|     // CHECK-NOT: store i16
 | |
|     // CHECK-NOT: load i8
 | |
|     // CHECK-NOT: store i8
 | |
| 
 | |
|     if x.len() == y.len() {
 | |
|         x.swap_with_slice(y);
 | |
|     }
 | |
| }
 | |
| 
 | |
| #[repr(align(64))]
 | |
| pub struct BigButHighlyAligned([u8; 64 * 3]);
 | |
| 
 | |
| // CHECK-LABEL: @swap_big_aligned
 | |
| #[no_mangle]
 | |
| pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
 | |
|     // CHECK-NOT: call void @llvm.memcpy
 | |
|     // CHECK-NOT: load i32
 | |
|     // CHECK-NOT: store i32
 | |
|     // CHECK-NOT: load i16
 | |
|     // CHECK-NOT: store i16
 | |
|     // CHECK-NOT: load i8
 | |
|     // CHECK-NOT: store i8
 | |
| 
 | |
|     // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 64,
 | |
|     // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 64,
 | |
| 
 | |
|     // CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 32,
 | |
|     // CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 32,
 | |
| 
 | |
|     // CHECK-NOT: load i32
 | |
|     // CHECK-NOT: store i32
 | |
|     // CHECK-NOT: load i16
 | |
|     // CHECK-NOT: store i16
 | |
|     // CHECK-NOT: load i8
 | |
|     // CHECK-NOT: store i8
 | |
|     // CHECK-NOT: call void @llvm.memcpy
 | |
|     swap(x, y)
 | |
| }
 | 
