Skip to content

Commit 831db11

Browse files
committed
Auto merge of #146232 - bjorn3:lto_allocator_shim, r=lqd
Make the allocator shim participate in LTO again This is likely the cause of the perf regression in #145955. It also caused some functional regressions. Fixes #146235 Fixes #146239
2 parents 0d0f4ea + 3851246 commit 831db11

File tree

7 files changed

+78
-29
lines changed

7 files changed

+78
-29
lines changed

compiler/rustc_codegen_cranelift/src/driver/aot.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ use cranelift_object::{ObjectBuilder, ObjectModule};
1212
use rustc_codegen_ssa::assert_module_sources::CguReuse;
1313
use rustc_codegen_ssa::back::link::ensure_removed;
1414
use rustc_codegen_ssa::base::determine_cgu_reuse;
15-
use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, errors as ssa_errors};
15+
use rustc_codegen_ssa::{
16+
CodegenResults, CompiledModule, CrateInfo, ModuleKind, errors as ssa_errors,
17+
};
1618
use rustc_data_structures::profiling::SelfProfilerRef;
1719
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
1820
use rustc_data_structures::sync::{IntoDynSyncSend, par_map};
@@ -361,6 +363,7 @@ fn emit_cgu(
361363
invocation_temp,
362364
prof,
363365
product.object,
366+
ModuleKind::Regular,
364367
name.clone(),
365368
producer,
366369
)?;
@@ -369,6 +372,7 @@ fn emit_cgu(
369372
module_regular,
370373
module_global_asm: global_asm_object_file.map(|global_asm_object_file| CompiledModule {
371374
name: format!("{name}.asm"),
375+
kind: ModuleKind::Regular,
372376
object: Some(global_asm_object_file),
373377
dwarf_object: None,
374378
bytecode: None,
@@ -385,6 +389,7 @@ fn emit_module(
385389
invocation_temp: Option<&str>,
386390
prof: &SelfProfilerRef,
387391
mut object: cranelift_object::object::write::Object<'_>,
392+
kind: ModuleKind,
388393
name: String,
389394
producer_str: &str,
390395
) -> Result<CompiledModule, String> {
@@ -425,6 +430,7 @@ fn emit_module(
425430

426431
Ok(CompiledModule {
427432
name,
433+
kind,
428434
object: Some(tmp_file),
429435
dwarf_object: None,
430436
bytecode: None,
@@ -479,6 +485,7 @@ fn reuse_workproduct_for_cgu(
479485
Ok(ModuleCodegenResult {
480486
module_regular: CompiledModule {
481487
name: cgu.name().to_string(),
488+
kind: ModuleKind::Regular,
482489
object: Some(obj_out_regular),
483490
dwarf_object: None,
484491
bytecode: None,
@@ -488,6 +495,7 @@ fn reuse_workproduct_for_cgu(
488495
},
489496
module_global_asm: source_file_global_asm.map(|source_file| CompiledModule {
490497
name: cgu.name().to_string(),
498+
kind: ModuleKind::Regular,
491499
object: Some(obj_out_global_asm),
492500
dwarf_object: None,
493501
bytecode: None,
@@ -643,6 +651,7 @@ fn emit_allocator_module(tcx: TyCtxt<'_>) -> Option<CompiledModule> {
643651
tcx.sess.invocation_temp.as_deref(),
644652
&tcx.sess.prof,
645653
product.object,
654+
ModuleKind::Allocator,
646655
"allocator_shim".to_owned(),
647656
&crate::debuginfo::producer(tcx.sess),
648657
) {

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use object::{Object, ObjectSection};
1111
use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared};
1212
use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
1313
use rustc_codegen_ssa::traits::*;
14-
use rustc_codegen_ssa::{ModuleCodegen, looks_like_rust_object_file};
14+
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
1515
use rustc_data_structures::fx::FxHashMap;
1616
use rustc_data_structures::memmap::Mmap;
1717
use rustc_errors::DiagCtxtHandle;
@@ -225,9 +225,15 @@ fn fat_lto(
225225
// All the other modules will be serialized and reparsed into the new
226226
// context, so this hopefully avoids serializing and parsing the largest
227227
// codegen unit.
228+
//
229+
// Additionally use a regular module as the base here to ensure that various
230+
// file copy operations in the backend work correctly. The only other kind
231+
// of module here should be an allocator one, and if your crate is smaller
232+
// than the allocator module then the size doesn't really matter anyway.
228233
let costliest_module = in_memory
229234
.iter()
230235
.enumerate()
236+
.filter(|&(_, module)| module.kind == ModuleKind::Regular)
231237
.map(|(i, module)| {
232238
let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
233239
(cost, i)

compiler/rustc_codegen_ssa/src/back/lto.rs

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::ffi::CString;
22
use std::sync::Arc;
33

4-
use rustc_ast::expand::allocator::AllocatorKind;
54
use rustc_data_structures::memmap::Mmap;
65
use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
76
use rustc_middle::middle::exported_symbols::{ExportedSymbol, SymbolExportInfo, SymbolExportLevel};
@@ -96,19 +95,6 @@ pub(super) fn exported_symbols_for_lto(
9695
.filter_map(|&(s, info): &(ExportedSymbol<'_>, SymbolExportInfo)| {
9796
if info.level.is_below_threshold(export_threshold) || info.used {
9897
Some(symbol_name_for_instance_in_crate(tcx, s, cnum))
99-
} else if export_threshold == SymbolExportLevel::C
100-
&& info.rustc_std_internal_symbol
101-
&& let Some(AllocatorKind::Default) = allocator_kind_for_codegen(tcx)
102-
{
103-
// Export the __rdl_* exports for usage by the allocator shim when not using
104-
// #[global_allocator]. Most of the conditions above are only used to avoid
105-
// unnecessary expensive symbol_name_for_instance_in_crate calls.
106-
let sym = symbol_name_for_instance_in_crate(tcx, s, cnum);
107-
if sym.contains("__rdl_") || sym.contains("__rg_oom") {
108-
Some(sym)
109-
} else {
110-
None
111-
}
11298
} else {
11399
None
114100
}

compiler/rustc_codegen_ssa/src/back/write.rs

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
334334
pub output_filenames: Arc<OutputFilenames>,
335335
pub invocation_temp: Option<String>,
336336
pub module_config: Arc<ModuleConfig>,
337+
pub allocator_config: Arc<ModuleConfig>,
337338
pub tm_factory: TargetMachineFactoryFn<B>,
338339
pub msvc_imps_needed: bool,
339340
pub is_pe_coff: bool,
@@ -489,7 +490,7 @@ fn copy_all_cgu_workproducts_to_incr_comp_cache_dir(
489490

490491
let _timer = sess.timer("copy_all_cgu_workproducts_to_incr_comp_cache_dir");
491492

492-
for module in &compiled_modules.modules {
493+
for module in compiled_modules.modules.iter().filter(|m| m.kind == ModuleKind::Regular) {
493494
let mut files = Vec::new();
494495
if let Some(object_file_path) = &module.object {
495496
files.push((OutputType::Object.extension(), object_file_path.as_path()));
@@ -794,12 +795,19 @@ pub(crate) fn compute_per_cgu_lto_type(
794795
sess_lto: &Lto,
795796
opts: &config::Options,
796797
sess_crate_types: &[CrateType],
798+
module_kind: ModuleKind,
797799
) -> ComputedLtoType {
798800
// If the linker does LTO, we don't have to do it. Note that we
799801
// keep doing full LTO, if it is requested, as not to break the
800802
// assumption that the output will be a single module.
801803
let linker_does_lto = opts.cg.linker_plugin_lto.enabled();
802804

805+
// When we're automatically doing ThinLTO for multi-codegen-unit
806+
// builds we don't actually want to LTO the allocator module if
807+
// it shows up. This is due to various linker shenanigans that
808+
// we'll encounter later.
809+
let is_allocator = module_kind == ModuleKind::Allocator;
810+
803811
// We ignore a request for full crate graph LTO if the crate type
804812
// is only an rlib, as there is no full crate graph to process,
805813
// that'll happen later.
@@ -811,7 +819,7 @@ pub(crate) fn compute_per_cgu_lto_type(
811819
let is_rlib = matches!(sess_crate_types, [CrateType::Rlib]);
812820

813821
match sess_lto {
814-
Lto::ThinLocal if !linker_does_lto => ComputedLtoType::Thin,
822+
Lto::ThinLocal if !linker_does_lto && !is_allocator => ComputedLtoType::Thin,
815823
Lto::Thin if !linker_does_lto && !is_rlib => ComputedLtoType::Thin,
816824
Lto::Fat if !is_rlib => ComputedLtoType::Fat,
817825
_ => ComputedLtoType::No,
@@ -825,18 +833,23 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
825833
let dcx = cgcx.create_dcx();
826834
let dcx = dcx.handle();
827835

828-
B::optimize(cgcx, dcx, &mut module, &cgcx.module_config);
836+
let module_config = match module.kind {
837+
ModuleKind::Regular => &cgcx.module_config,
838+
ModuleKind::Allocator => &cgcx.allocator_config,
839+
};
840+
841+
B::optimize(cgcx, dcx, &mut module, module_config);
829842

830843
// After we've done the initial round of optimizations we need to
831844
// decide whether to synchronously codegen this module or ship it
832845
// back to the coordinator thread for further LTO processing (which
833846
// has to wait for all the initial modules to be optimized).
834847

835-
let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types);
848+
let lto_type = compute_per_cgu_lto_type(&cgcx.lto, &cgcx.opts, &cgcx.crate_types, module.kind);
836849

837850
// If we're doing some form of incremental LTO then we need to be sure to
838851
// save our module to disk first.
839-
let bitcode = if cgcx.module_config.emit_pre_lto_bc {
852+
let bitcode = if module_config.emit_pre_lto_bc {
840853
let filename = pre_lto_bitcode_filename(&module.name);
841854
cgcx.incr_comp_session_dir.as_ref().map(|path| path.join(&filename))
842855
} else {
@@ -845,7 +858,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
845858

846859
match lto_type {
847860
ComputedLtoType::No => {
848-
let module = B::codegen(cgcx, module, &cgcx.module_config);
861+
let module = B::codegen(cgcx, module, module_config);
849862
WorkItemResult::Finished(module)
850863
}
851864
ComputedLtoType::Thin => {
@@ -947,6 +960,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
947960

948961
WorkItemResult::Finished(CompiledModule {
949962
links_from_incr_cache,
963+
kind: ModuleKind::Regular,
950964
name: module.name,
951965
object,
952966
dwarf_object,
@@ -1133,6 +1147,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
11331147
diag_emitter: shared_emitter.clone(),
11341148
output_filenames: Arc::clone(tcx.output_filenames(())),
11351149
module_config: regular_config,
1150+
allocator_config,
11361151
tm_factory: backend.target_machine_factory(tcx.sess, ol, backend_features),
11371152
msvc_imps_needed: msvc_imps_needed(tcx),
11381153
is_pe_coff: tcx.sess.target.is_like_windows,
@@ -1147,11 +1162,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
11471162
invocation_temp: sess.invocation_temp.clone(),
11481163
};
11491164

1150-
let compiled_allocator_module = allocator_module.map(|mut allocator_module| {
1151-
B::optimize(&cgcx, tcx.sess.dcx(), &mut allocator_module, &allocator_config);
1152-
B::codegen(&cgcx, allocator_module, &allocator_config)
1153-
});
1154-
11551165
// This is the "main loop" of parallel work happening for parallel codegen.
11561166
// It's here that we manage parallelism, schedule work, and work with
11571167
// messages coming from clients.
@@ -1331,6 +1341,17 @@ fn start_executing_work<B: ExtraBackendMethods>(
13311341

13321342
let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;
13331343

1344+
let compiled_allocator_module = allocator_module.and_then(|allocator_module| {
1345+
match execute_optimize_work_item(&cgcx, allocator_module) {
1346+
WorkItemResult::Finished(compiled_module) => return Some(compiled_module),
1347+
WorkItemResult::NeedsFatLto(fat_lto_input) => needs_fat_lto.push(fat_lto_input),
1348+
WorkItemResult::NeedsThinLto(name, thin_buffer) => {
1349+
needs_thin_lto.push((name, thin_buffer))
1350+
}
1351+
}
1352+
None
1353+
});
1354+
13341355
// Run the message loop while there's still anything that needs message
13351356
// processing. Note that as soon as codegen is aborted we simply want to
13361357
// wait for all existing work to finish, so many of the conditions here

compiler/rustc_codegen_ssa/src/base.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ use crate::meth::load_vtable;
4646
use crate::mir::operand::OperandValue;
4747
use crate::mir::place::PlaceRef;
4848
use crate::traits::*;
49-
use crate::{CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, errors, meth, mir};
49+
use crate::{
50+
CachedModuleCodegen, CodegenLintLevels, CrateInfo, ModuleCodegen, ModuleKind, errors, meth, mir,
51+
};
5052

5153
pub(crate) fn bin_op_to_icmp_predicate(op: BinOp, signed: bool) -> IntPredicate {
5254
match (op, signed) {
@@ -1124,7 +1126,12 @@ pub fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) ->
11241126
// We can re-use either the pre- or the post-thinlto state. If no LTO is
11251127
// being performed then we can use post-LTO artifacts, otherwise we must
11261128
// reuse pre-LTO artifacts
1127-
match compute_per_cgu_lto_type(&tcx.sess.lto(), &tcx.sess.opts, tcx.crate_types()) {
1129+
match compute_per_cgu_lto_type(
1130+
&tcx.sess.lto(),
1131+
&tcx.sess.opts,
1132+
tcx.crate_types(),
1133+
ModuleKind::Regular,
1134+
) {
11281135
ComputedLtoType::No => CguReuse::PostLto,
11291136
_ => CguReuse::PreLto,
11301137
}

compiler/rustc_codegen_ssa/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ impl<M> ModuleCodegen<M> {
120120

121121
CompiledModule {
122122
name: self.name,
123+
kind: self.kind,
123124
object,
124125
dwarf_object,
125126
bytecode,
@@ -133,6 +134,7 @@ impl<M> ModuleCodegen<M> {
133134
#[derive(Debug, Encodable, Decodable)]
134135
pub struct CompiledModule {
135136
pub name: String,
137+
pub kind: ModuleKind,
136138
pub object: Option<PathBuf>,
137139
pub dwarf_object: Option<PathBuf>,
138140
pub bytecode: Option<PathBuf>,

tests/ui/lto/lto-global-allocator.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//@ compile-flags: --crate-type cdylib -C lto
2+
//@ build-pass
3+
//@ no-prefer-dynamic
4+
5+
use std::alloc::{GlobalAlloc, Layout};
6+
7+
struct MyAllocator;
8+
9+
unsafe impl GlobalAlloc for MyAllocator {
10+
unsafe fn alloc(&self, _layout: Layout) -> *mut u8 {
11+
todo!()
12+
}
13+
14+
unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {}
15+
}
16+
17+
#[global_allocator]
18+
static GLOBAL: MyAllocator = MyAllocator;

0 commit comments

Comments
 (0)