mirror of
https://github.com/rust-lang/rust.git
synced 2025-10-06 04:06:31 +00:00

It modernizes the coding style of the crate stdarch-test by fixing Clippy warnings. Clippy: rust version 1.89.0-nightly (6f6971078 2025-05-28) Number of Fixed Warnings: 1/1
219 lines
8.2 KiB
Rust
219 lines
8.2 KiB
Rust
//! Runtime support needed for testing the stdarch crate.
|
|
//!
|
|
//! This basically just disassembles the current executable and then parses the
|
|
//! output once globally and then provides the `assert` function which makes
|
|
//! assertions about the disassembly of a function.
|
|
#![deny(rust_2018_idioms)]
|
|
#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]
|
|
|
|
#[macro_use]
|
|
extern crate lazy_static;
|
|
#[macro_use]
|
|
extern crate cfg_if;
|
|
|
|
pub use assert_instr_macro::*;
|
|
pub use simd_test_macro::*;
|
|
use std::{cmp, collections::HashSet, env, hash, hint::black_box, str};
|
|
|
|
cfg_if! {
|
|
if #[cfg(target_arch = "wasm32")] {
|
|
pub mod wasm;
|
|
use wasm::disassemble_myself;
|
|
} else {
|
|
mod disassembly;
|
|
use crate::disassembly::disassemble_myself;
|
|
}
|
|
}
|
|
|
|
lazy_static! {
|
|
static ref DISASSEMBLY: HashSet<Function> = disassemble_myself();
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
struct Function {
|
|
name: String,
|
|
instrs: Vec<String>,
|
|
}
|
|
impl Function {
|
|
fn new(n: &str) -> Self {
|
|
Self {
|
|
name: n.to_string(),
|
|
instrs: Vec::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl cmp::PartialEq for Function {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
self.name == other.name
|
|
}
|
|
}
|
|
impl cmp::Eq for Function {}
|
|
|
|
impl hash::Hash for Function {
|
|
fn hash<H: hash::Hasher>(&self, state: &mut H) {
|
|
self.name.hash(state)
|
|
}
|
|
}
|
|
|
|
/// Main entry point for this crate, called by the `#[assert_instr]` macro.
|
|
///
|
|
/// This asserts that the function at `fnptr` contains the instruction
|
|
/// `expected` provided.
|
|
pub fn assert(shim_addr: usize, fnname: &str, expected: &str) {
|
|
// Make sure that the shim is not removed
|
|
black_box(shim_addr);
|
|
|
|
//eprintln!("shim name: {fnname}");
|
|
let function = &DISASSEMBLY
|
|
.get(&Function::new(fnname))
|
|
.unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly"));
|
|
//eprintln!(" function: {:?}", function);
|
|
|
|
let mut instrs = &function.instrs[..];
|
|
while instrs.last().is_some_and(|s| s == "nop" || s == "int3") {
|
|
instrs = &instrs[..instrs.len() - 1];
|
|
}
|
|
|
|
// Look for `expected` as the first part of any instruction in this
|
|
// function, e.g., tzcntl in tzcntl %rax,%rax.
|
|
//
|
|
// There are two cases when the expected instruction is nop:
|
|
// 1. The expected intrinsic is compiled away so we can't
|
|
// check for it - aka the intrinsic is not generating any code.
|
|
// 2. It is a mark, indicating that the instruction will be
|
|
// compiled into other instructions - mainly because of llvm
|
|
// optimization.
|
|
let expected = if expected == "unknown" {
|
|
"<unknown>" // Workaround for rust-lang/stdarch#1674, todo: remove when the issue is fixed
|
|
} else {
|
|
expected
|
|
};
|
|
let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected));
|
|
|
|
// Look for subroutine call instructions in the disassembly to detect whether
|
|
// inlining failed: all intrinsics are `#[inline(always)]`, so calling one
|
|
// intrinsic from another should not generate subroutine call instructions.
|
|
let inlining_failed = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "wasm32") {
|
|
instrs.iter().any(|s| s.starts_with("call "))
|
|
} else if cfg!(target_arch = "x86") {
|
|
instrs.windows(2).any(|s| {
|
|
// On 32-bit x86 position independent code will call itself and be
|
|
// immediately followed by a `pop` to learn about the current address.
|
|
// Let's not take that into account when considering whether a function
|
|
// failed inlining something.
|
|
s[0].starts_with("call ") && s[1].starts_with("pop") // FIXME: original logic but does not match comment
|
|
})
|
|
} else if cfg!(any(
|
|
target_arch = "aarch64",
|
|
target_arch = "arm64ec",
|
|
target_arch = "powerpc",
|
|
target_arch = "powerpc64"
|
|
)) {
|
|
instrs.iter().any(|s| s.starts_with("bl "))
|
|
} else {
|
|
// FIXME: Add detection for other archs
|
|
false
|
|
};
|
|
|
|
let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT")
|
|
.ok()
|
|
.map_or_else(
|
|
|| match expected {
|
|
// `cpuid` returns a pretty big aggregate structure, so exempt
|
|
// it from the slightly more restrictive 22 instructions below.
|
|
"cpuid" => 30,
|
|
|
|
// These require 8 loads and stores, so it _just_ overflows the limit
|
|
"aesencwide128kl" | "aesencwide256kl" | "aesdecwide128kl" | "aesdecwide256kl" => 24,
|
|
|
|
// Apparently, on Windows, LLVM generates a bunch of
|
|
// saves/restores of xmm registers around these instructions,
|
|
// which exceeds the limit of 20 below. As it seems dictated by
|
|
// Windows's ABI (I believe?), we probably can't do much
|
|
// about it.
|
|
"vzeroall" | "vzeroupper" if cfg!(windows) => 30,
|
|
|
|
// Intrinsics using `cvtpi2ps` are typically "composites" and
|
|
// in some cases exceed the limit.
|
|
"cvtpi2ps" => 25,
|
|
// core_arch/src/arm_shared/simd32
|
|
// vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit)
|
|
"usad8" | "vfma" | "vfms" => 27,
|
|
"qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29,
|
|
// core_arch/src/arm_shared/simd32
|
|
// vst1q_s64_x4_vst1 : #instructions = 27 >= 22 (limit)
|
|
"vld3" => 28,
|
|
// core_arch/src/arm_shared/simd32
|
|
// vld4q_lane_u32_vld4 : #instructions = 36 >= 22 (limit)
|
|
"vld4" => 37,
|
|
// core_arch/src/arm_shared/simd32
|
|
// vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit)
|
|
"vst1" => 41,
|
|
// core_arch/src/arm_shared/simd32
|
|
// vst3q_u32_vst3 : #instructions = 25 >= 22 (limit)
|
|
"vst3" => 26,
|
|
// core_arch/src/arm_shared/simd32
|
|
// vst4q_u32_vst4 : #instructions = 33 >= 22 (limit)
|
|
"vst4" => 34,
|
|
|
|
// core_arch/src/arm_shared/simd32
|
|
// vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit)
|
|
"nop" if fnname.contains("vst1q_p64") => 34,
|
|
|
|
// Original limit was 20 instructions, but ARM DSP Intrinsics
|
|
// are exactly 20 instructions long. So, bump the limit to 22
|
|
// instead of adding here a long list of exceptions.
|
|
_ => {
|
|
// aarch64_be may add reverse instructions which increases
|
|
// the number of instructions generated.
|
|
if cfg!(all(target_endian = "big", target_arch = "aarch64")) {
|
|
32
|
|
} else {
|
|
22
|
|
}
|
|
}
|
|
},
|
|
|v| v.parse().unwrap(),
|
|
);
|
|
let probably_only_one_instruction = instrs.len() < instruction_limit;
|
|
|
|
if found && probably_only_one_instruction && !inlining_failed {
|
|
return;
|
|
}
|
|
|
|
// Help debug by printing out the found disassembly, and then panic as we
|
|
// didn't find the instruction.
|
|
println!("disassembly for {fnname}: ",);
|
|
for (i, instr) in instrs.iter().enumerate() {
|
|
println!("\t{i:2}: {instr}");
|
|
}
|
|
|
|
if !found {
|
|
panic!("failed to find instruction `{expected}` in the disassembly");
|
|
} else if !probably_only_one_instruction {
|
|
panic!(
|
|
"instruction found, but the disassembly contains too many \
|
|
instructions: #instructions = {} >= {} (limit)",
|
|
instrs.len(),
|
|
instruction_limit
|
|
);
|
|
} else if inlining_failed {
|
|
panic!(
|
|
"instruction found, but the disassembly contains subroutine \
|
|
call instructions, which hint that inlining failed"
|
|
);
|
|
}
|
|
}
|
|
|
|
pub fn assert_skip_test_ok(name: &str, missing_features: &[&str]) {
|
|
println!("Skipping test `{name}` due to missing target features:");
|
|
for feature in missing_features {
|
|
println!(" - {feature}");
|
|
}
|
|
match env::var("STDARCH_TEST_EVERYTHING") {
|
|
Ok(_) => panic!("skipped test `{name}` when it shouldn't be skipped"),
|
|
Err(_) => println!("Set STDARCH_TEST_EVERYTHING to make this an error."),
|
|
}
|
|
}
|