mirror of
https://github.com/rust-lang/rust.git
synced 2026-03-19 11:09:32 +00:00
Auto merge of #94381 - Kobzol:llvm-bolt, r=Mark-Simulacrum
Use BOLT in CI to optimize LLVM This PR adds an optimization step in the Linux `dist` CI pipeline that uses [BOLT](https://github.com/llvm/llvm-project/tree/main/bolt) to optimize the `libLLVM.so` library built by boostrap. Steps: - [x] Use LLVM 15 as a bootstrap compiler and use it to build BOLT - [x] Compile LLVM with support for relocations (`-DCMAKE_SHARED_LINKER_FLAGS="-Wl,-q"`) - [x] Gather profile data using instrumented LLVM - [x] Apply profile to LLVM that has already been PGOfied - [x] Run with BOLT profiling on more benchmarks - [x] Decide on the order of optimization (PGO -> BOLT?) - [x] Decide how we should get `bolt` (currently we use the host `bolt`) - [x] Clean up The latest perf results can be found [here](https://github.com/rust-lang/rust/pull/94381#issuecomment-1258269440). The current CI build time with BOLT applied is around 1h 55 minutes.
This commit is contained in:
71
src/bootstrap/bolt.rs
Normal file
71
src/bootstrap/bolt.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
/// Uses the `llvm-bolt` binary to instrument the binary/library at the given `path` with BOLT.
|
||||
/// When the instrumented artifact is executed, it will generate BOLT profiles into
|
||||
/// `/tmp/prof.fdata.<pid>.fdata`.
|
||||
pub fn instrument_with_bolt_inplace(path: &Path) {
|
||||
let dir = std::env::temp_dir();
|
||||
let instrumented_path = dir.join("instrumented.so");
|
||||
|
||||
let status = Command::new("llvm-bolt")
|
||||
.arg("-instrument")
|
||||
.arg(&path)
|
||||
// Make sure that each process will write its profiles into a separate file
|
||||
.arg("--instrumentation-file-append-pid")
|
||||
.arg("-o")
|
||||
.arg(&instrumented_path)
|
||||
.status()
|
||||
.expect("Could not instrument artifact using BOLT");
|
||||
|
||||
if !status.success() {
|
||||
panic!("Could not instrument {} with BOLT, exit code {:?}", path.display(), status.code());
|
||||
}
|
||||
|
||||
std::fs::copy(&instrumented_path, path).expect("Cannot copy instrumented artifact");
|
||||
std::fs::remove_file(instrumented_path).expect("Cannot delete instrumented artifact");
|
||||
}
|
||||
|
||||
/// Uses the `llvm-bolt` binary to optimize the binary/library at the given `path` with BOLT,
|
||||
/// using merged profiles from `profile_path`.
|
||||
///
|
||||
/// The recorded profiles have to be merged using the `merge-fdata` tool from LLVM and the merged
|
||||
/// profile path should be then passed to this function.
|
||||
pub fn optimize_library_with_bolt_inplace(path: &Path, profile_path: &Path) {
|
||||
let dir = std::env::temp_dir();
|
||||
let optimized_path = dir.join("optimized.so");
|
||||
|
||||
let status = Command::new("llvm-bolt")
|
||||
.arg(&path)
|
||||
.arg("-data")
|
||||
.arg(&profile_path)
|
||||
.arg("-o")
|
||||
.arg(&optimized_path)
|
||||
// Reorder basic blocks within functions
|
||||
.arg("-reorder-blocks=ext-tsp")
|
||||
// Reorder functions within the binary
|
||||
.arg("-reorder-functions=hfsort+")
|
||||
// Split function code into hot and code regions
|
||||
.arg("-split-functions=2")
|
||||
// Split as many basic blocks as possible
|
||||
.arg("-split-all-cold")
|
||||
// Move jump tables to a separate section
|
||||
.arg("-jump-tables=move")
|
||||
// Use GNU_STACK program header for new segment (workaround for issues with strip/objcopy)
|
||||
.arg("-use-gnu-stack")
|
||||
// Fold functions with identical code
|
||||
.arg("-icf=1")
|
||||
// Update DWARF debug info in the final binary
|
||||
.arg("-update-debug-sections")
|
||||
// Print optimization statistics
|
||||
.arg("-dyno-stats")
|
||||
.status()
|
||||
.expect("Could not optimize artifact using BOLT");
|
||||
|
||||
if !status.success() {
|
||||
panic!("Could not optimize {} with BOLT, exit code {:?}", path.display(), status.code());
|
||||
}
|
||||
|
||||
std::fs::copy(&optimized_path, path).expect("Cannot copy optimized artifact");
|
||||
std::fs::remove_file(optimized_path).expect("Cannot delete optimized artifact");
|
||||
}
|
||||
@@ -161,6 +161,8 @@ pub struct Config {
|
||||
pub llvm_profile_use: Option<String>,
|
||||
pub llvm_profile_generate: bool,
|
||||
pub llvm_libunwind_default: Option<LlvmLibunwind>,
|
||||
pub llvm_bolt_profile_generate: bool,
|
||||
pub llvm_bolt_profile_use: Option<String>,
|
||||
|
||||
pub build: TargetSelection,
|
||||
pub hosts: Vec<TargetSelection>,
|
||||
@@ -806,6 +808,15 @@ impl Config {
|
||||
}
|
||||
config.llvm_profile_use = flags.llvm_profile_use;
|
||||
config.llvm_profile_generate = flags.llvm_profile_generate;
|
||||
config.llvm_bolt_profile_generate = flags.llvm_bolt_profile_generate;
|
||||
config.llvm_bolt_profile_use = flags.llvm_bolt_profile_use;
|
||||
|
||||
if config.llvm_bolt_profile_generate && config.llvm_bolt_profile_use.is_some() {
|
||||
eprintln!(
|
||||
"Cannot use both `llvm_bolt_profile_generate` and `llvm_bolt_profile_use` at the same time"
|
||||
);
|
||||
crate::detail_exit(1);
|
||||
}
|
||||
|
||||
// Infer the rest of the configuration.
|
||||
|
||||
|
||||
@@ -2159,6 +2159,10 @@ impl Step for ReproducibleArtifacts {
|
||||
tarball.add_file(path, ".", 0o644);
|
||||
added_anything = true;
|
||||
}
|
||||
if let Some(path) = builder.config.llvm_bolt_profile_use.as_ref() {
|
||||
tarball.add_file(path, ".", 0o644);
|
||||
added_anything = true;
|
||||
}
|
||||
if added_anything { Some(tarball.generate()) } else { None }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,6 +78,8 @@ pub struct Flags {
|
||||
//
|
||||
// llvm_out/build/profiles/ is the location this writes to.
|
||||
pub llvm_profile_generate: bool,
|
||||
pub llvm_bolt_profile_generate: bool,
|
||||
pub llvm_bolt_profile_use: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -255,6 +257,8 @@ To learn more about a subcommand, run `./x.py <subcommand> -h`",
|
||||
opts.optmulti("D", "", "deny certain clippy lints", "OPT");
|
||||
opts.optmulti("W", "", "warn about certain clippy lints", "OPT");
|
||||
opts.optmulti("F", "", "forbid certain clippy lints", "OPT");
|
||||
opts.optflag("", "llvm-bolt-profile-generate", "generate BOLT profile for LLVM build");
|
||||
opts.optopt("", "llvm-bolt-profile-use", "use BOLT profile for LLVM build", "PROFILE");
|
||||
|
||||
// We can't use getopt to parse the options until we have completed specifying which
|
||||
// options are valid, but under the current implementation, some options are conditional on
|
||||
@@ -691,6 +695,8 @@ Arguments:
|
||||
rust_profile_generate: matches.opt_str("rust-profile-generate"),
|
||||
llvm_profile_use: matches.opt_str("llvm-profile-use"),
|
||||
llvm_profile_generate: matches.opt_present("llvm-profile-generate"),
|
||||
llvm_bolt_profile_generate: matches.opt_present("llvm-bolt-profile-generate"),
|
||||
llvm_bolt_profile_use: matches.opt_str("llvm-bolt-profile-use"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,6 +122,7 @@ use crate::util::{
|
||||
check_run, exe, libdir, mtime, output, run, run_suppressed, try_run, try_run_suppressed, CiEnv,
|
||||
};
|
||||
|
||||
mod bolt;
|
||||
mod builder;
|
||||
mod cache;
|
||||
mod cc_detect;
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
|
||||
use crate::bolt::{instrument_with_bolt_inplace, optimize_library_with_bolt_inplace};
|
||||
use crate::builder::{Builder, RunConfig, ShouldRun, Step};
|
||||
use crate::channel;
|
||||
use crate::config::TargetSelection;
|
||||
@@ -403,6 +404,12 @@ impl Step for Llvm {
|
||||
if let Some(path) = builder.config.llvm_profile_use.as_ref() {
|
||||
cfg.define("LLVM_PROFDATA_FILE", &path);
|
||||
}
|
||||
if builder.config.llvm_bolt_profile_generate
|
||||
|| builder.config.llvm_bolt_profile_use.is_some()
|
||||
{
|
||||
// Relocations are required for BOLT to work.
|
||||
ldflags.push_all("-Wl,-q");
|
||||
}
|
||||
|
||||
// Disable zstd to avoid a dependency on libzstd.so.
|
||||
cfg.define("LLVM_ENABLE_ZSTD", "OFF");
|
||||
@@ -571,12 +578,34 @@ impl Step for Llvm {
|
||||
}
|
||||
}
|
||||
|
||||
// After LLVM is built, we modify (instrument or optimize) the libLLVM.so library file
|
||||
// in place. This is fine, because currently we do not support incrementally rebuilding
|
||||
// LLVM after a configuration change, so to rebuild it the build files have to be removed,
|
||||
// which will also remove these modified files.
|
||||
if builder.config.llvm_bolt_profile_generate {
|
||||
instrument_with_bolt_inplace(&get_built_llvm_lib_path(&build_llvm_config));
|
||||
}
|
||||
if let Some(path) = &builder.config.llvm_bolt_profile_use {
|
||||
optimize_library_with_bolt_inplace(
|
||||
&get_built_llvm_lib_path(&build_llvm_config),
|
||||
&Path::new(path),
|
||||
);
|
||||
}
|
||||
|
||||
t!(stamp.write());
|
||||
|
||||
build_llvm_config
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns path to a built LLVM library (libLLVM.so).
|
||||
/// Assumes that we have built LLVM into a single library file.
|
||||
fn get_built_llvm_lib_path(llvm_config_path: &Path) -> PathBuf {
|
||||
let mut cmd = Command::new(llvm_config_path);
|
||||
cmd.arg("--libfiles");
|
||||
PathBuf::from(output(&mut cmd).trim())
|
||||
}
|
||||
|
||||
fn check_llvm_version(builder: &Builder<'_>, llvm_config: &Path) {
|
||||
if !builder.config.llvm_version_check {
|
||||
return;
|
||||
|
||||
@@ -22,7 +22,7 @@ INC="/rustroot/include:/usr/include"
|
||||
|
||||
# We need compiler-rt for the profile runtime (used later to PGO the LLVM build)
|
||||
# but sanitizers aren't currently building. Since we don't need those, just
|
||||
# disable them.
|
||||
# disable them. BOLT is used for optimizing LLVM.
|
||||
hide_output \
|
||||
cmake ../llvm \
|
||||
-DCMAKE_C_COMPILER=/rustroot/bin/gcc \
|
||||
@@ -36,7 +36,7 @@ hide_output \
|
||||
-DLLVM_INCLUDE_BENCHMARKS=OFF \
|
||||
-DLLVM_INCLUDE_TESTS=OFF \
|
||||
-DLLVM_INCLUDE_EXAMPLES=OFF \
|
||||
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt" \
|
||||
-DLLVM_ENABLE_PROJECTS="clang;lld;compiler-rt;bolt" \
|
||||
-DC_INCLUDE_DIRS="$INC"
|
||||
|
||||
hide_output make -j$(nproc)
|
||||
|
||||
@@ -190,11 +190,40 @@ rm -r $RUSTC_PROFILE_DIRECTORY_ROOT
|
||||
# directories ourselves.
|
||||
rm -r $BUILD_ARTIFACTS/llvm $BUILD_ARTIFACTS/lld
|
||||
|
||||
# This produces the actual final set of artifacts, using both the LLVM and rustc
|
||||
# collected profiling data.
|
||||
$@ \
|
||||
--rust-profile-use=${RUSTC_PROFILE_MERGED_FILE} \
|
||||
--llvm-profile-use=${LLVM_PROFILE_MERGED_FILE}
|
||||
if isLinux; then
|
||||
# Gather BOLT profile (BOLT is currently only available on Linux)
|
||||
python3 ../x.py build --target=$PGO_HOST --host=$PGO_HOST \
|
||||
--stage 2 library/std \
|
||||
--llvm-profile-use=${LLVM_PROFILE_MERGED_FILE} \
|
||||
--llvm-bolt-profile-generate
|
||||
|
||||
BOLT_PROFILE_MERGED_FILE=/tmp/bolt.profdata
|
||||
|
||||
# Here we're profiling Bolt.
|
||||
gather_profiles "Check,Debug,Opt" "Full" \
|
||||
"syn-1.0.89,serde-1.0.136,ripgrep-13.0.0,regex-1.5.5,clap-3.1.6,hyper-0.14.18"
|
||||
|
||||
merge-fdata /tmp/prof.fdata* > ${BOLT_PROFILE_MERGED_FILE}
|
||||
|
||||
echo "BOLT statistics"
|
||||
du -sh /tmp/prof.fdata*
|
||||
du -sh ${BOLT_PROFILE_MERGED_FILE}
|
||||
echo "Profile file count"
|
||||
find /tmp/prof.fdata* -type f | wc -l
|
||||
|
||||
rm -r $BUILD_ARTIFACTS/llvm $BUILD_ARTIFACTS/lld
|
||||
|
||||
# This produces the actual final set of artifacts, using both the LLVM and rustc
|
||||
# collected profiling data.
|
||||
$@ \
|
||||
--rust-profile-use=${RUSTC_PROFILE_MERGED_FILE} \
|
||||
--llvm-profile-use=${LLVM_PROFILE_MERGED_FILE} \
|
||||
--llvm-bolt-profile-use=${BOLT_PROFILE_MERGED_FILE}
|
||||
else
|
||||
$@ \
|
||||
--rust-profile-use=${RUSTC_PROFILE_MERGED_FILE} \
|
||||
--llvm-profile-use=${LLVM_PROFILE_MERGED_FILE}
|
||||
fi
|
||||
|
||||
echo "Rustc binary size"
|
||||
ls -la ./build/$PGO_HOST/stage2/bin
|
||||
|
||||
Reference in New Issue
Block a user