# --- T2-COPYRIGHT-NOTE-BEGIN --- # T2 SDE: package/*/mesa/llvm-19-passmgr-amd.patch # Copyright (C) 2024 The T2 SDE Project # # This Copyright note is generated by scripts/Create-CopyPatch, # more information can be found in the files COPYING and README. # # This patch file is dual-licensed. It is available under the license the # patched project is licensed under, as long as it is an OpenSource license # as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms # of the GNU General Public License version 2 as used by the T2 SDE. # --- T2-COPYRIGHT-NOTE-END --- commit 38e50221cd9f37877d258dca84b8d3f58dbdcb88 Author: Ganesh Belgur Ramachandra Date: Sun Aug 4 11:40:18 2024 -0500 amd,radeonsi: use new pass manager to handle midend optimizations Adds an optimizer structure that builds an optimization pipeline to run LLVM passes using the new pass manager. Reviewed-by: Marek Olšák Part-of: diff --git a/src/amd/llvm/ac_llvm_helper.cpp b/src/amd/llvm/ac_llvm_helper.cpp index af4a50f8409..429796f40ad 100644 --- a/src/amd/llvm/ac_llvm_helper.cpp +++ b/src/amd/llvm/ac_llvm_helper.cpp @@ -17,9 +17,14 @@ #include #include #include -#include +#include #include +#include #include +#include +#include +#include +#include #include "llvm/CodeGen/SelectionDAGNodes.h" #include @@ -234,6 +239,95 @@ struct raw_memory_ostream : public raw_pwrite_stream { } }; +/* The middle-end optimization passes are run using + * the LLVM's new pass manager infrastructure. + */ +struct ac_midend_optimizer +{ + TargetMachine *target_machine; + PassBuilder pass_builder; + TargetLibraryInfoImpl target_library_info; + + /* Should be declared in this order only, + * so that they are destroyed in the correct order + * due to inter-analysis-manager references. + */ + LoopAnalysisManager loop_am; + FunctionAnalysisManager function_am; + CGSCCAnalysisManager cgscc_am; + ModuleAnalysisManager module_am; + + /* Pass Managers */ + LoopPassManager loop_pm; + FunctionPassManager function_pm; + ModulePassManager module_pm; + + ac_midend_optimizer(TargetMachine *arg_target_machine, bool arg_check_ir) + : target_machine(arg_target_machine), + pass_builder(target_machine, PipelineTuningOptions(), {}), + target_library_info(Triple(target_machine->getTargetTriple())) + { + /* Build the pipeline and optimize. + * Any custom analyses should be registered + * before LLVM's default analysis sets. + */ + function_am.registerPass( + [&] { return TargetLibraryAnalysis(target_library_info); } + ); + + pass_builder.registerModuleAnalyses(module_am); + pass_builder.registerCGSCCAnalyses(cgscc_am); + pass_builder.registerFunctionAnalyses(function_am); + pass_builder.registerLoopAnalyses(loop_am); + pass_builder.crossRegisterProxies(loop_am, function_am, cgscc_am, module_am); + + if (arg_check_ir) + module_pm.addPass(VerifierPass()); + + /* Adding inliner pass to the module pass manager directly + * ensures that the pass is run on all functions first, which makes sure + * that the following passes are only run on the remaining non-inline + * function, so it removes useless work done on dead inline functions. + */ + module_pm.addPass(AlwaysInlinerPass()); + + /* The following set of passes run on an individual function/loop first + * before proceeding to the next. + */ +#if LLVM_VERSION_MAJOR >= 16 + function_pm.addPass(SROAPass(SROAOptions::ModifyCFG)); +#else + // Old version of the code + function_pm.addPass(SROAPass()); +#endif + + loop_pm.addPass(LICMPass(LICMOptions())); + function_pm.addPass(createFunctionToLoopPassAdaptor(std::move(loop_pm), true)); + function_pm.addPass(SimplifyCFGPass()); + function_pm.addPass(EarlyCSEPass(true)); + + module_pm.addPass(createModuleToFunctionPassAdaptor(std::move(function_pm))); + } + + void run(Module &module) + { + module_pm.run(module, module_am); + + /* After a run(), the results in the analyses managers + * aren't useful to optimize a subsequent LLVM module. + * If used, it can lead to unexpected crashes. + * Hence, the results in the analyses managers + * need to be invalidated and cleared before + * running optimizations on a new LLVM module. + */ + module_am.invalidate(module, PreservedAnalyses::none()); + module_am.clear(); + cgscc_am.clear(); + function_am.clear(); + loop_am.clear(); + } +}; + /* The LLVM compiler is represented as a pass manager containing passes for * optimizations, instruction selection, and code generation. */ @@ -277,41 +371,26 @@ bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module return true; } -LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, - bool check_ir) +ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm, + bool check_ir) { - LLVMPassManagerRef passmgr = LLVMCreatePassManager(); - if (!passmgr) - return NULL; - - if (target_library_info) - LLVMAddTargetLibraryInfo(target_library_info, passmgr); + TargetMachine *TM = reinterpret_cast(tm); + return new ac_midend_optimizer(TM, check_ir); +} - if (check_ir) - unwrap(passmgr)->add(createVerifierPass()); +void ac_destroy_midend_optimiser(ac_midend_optimizer *meo) +{ + delete meo; +} - unwrap(passmgr)->add(createAlwaysInlinerLegacyPass()); +bool ac_llvm_optimize_module(ac_midend_optimizer *meo, LLVMModuleRef module) +{ + if (!meo) + return false; - /* Normally, the pass manager runs all passes on one function before - * moving onto another. Adding a barrier no-op pass forces the pass - * manager to run the inliner on all functions first, which makes sure - * that the following passes are only run on the remaining non-inline - * function, so it removes useless work done on dead inline functions. - */ - unwrap(passmgr)->add(createBarrierNoopPass()); - - #if LLVM_VERSION_MAJOR >= 16 - unwrap(passmgr)->add(createSROAPass(true)); - #else - unwrap(passmgr)->add(createSROAPass()); - #endif - /* TODO: restore IPSCCP */ - unwrap(passmgr)->add(createLICMPass()); - unwrap(passmgr)->add(createCFGSimplificationPass()); - /* This is recommended by the instruction combining pass. */ - unwrap(passmgr)->add(createEarlyCSEPass(true)); - unwrap(passmgr)->add(createInstructionCombiningPass()); - return passmgr; + /* Runs all the middle-end optimizations, no code generation */ + meo->run(*unwrap(module)); + return true; } LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, diff --git a/src/amd/llvm/ac_llvm_util.c b/src/amd/llvm/ac_llvm_util.c index 221cb99ee60..dddcfa32289 100644 --- a/src/amd/llvm/ac_llvm_util.c +++ b/src/amd/llvm/ac_llvm_util.c @@ -182,9 +182,9 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family if (!compiler->target_library_info) goto fail; - compiler->passmgr = - ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR); - if (!compiler->passmgr) + compiler->meo = + ac_create_midend_optimizer(compiler->tm, tm_options & AC_TM_CHECK_IR); + if (!compiler->meo) goto fail; return true; @@ -198,8 +198,10 @@ void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) ac_destroy_llvm_passes(compiler->passes); ac_destroy_llvm_passes(compiler->low_opt_passes); - if (compiler->passmgr) - LLVMDisposePassManager(compiler->passmgr); + /* delete optimizer pass manager */ + if (compiler->meo) + ac_destroy_midend_optimiser(compiler->meo); + if (compiler->target_library_info) ac_dispose_target_library_info(compiler->target_library_info); if (compiler->low_opt_tm) diff --git a/src/amd/llvm/ac_llvm_util.h b/src/amd/llvm/ac_llvm_util.h index 29f4a6a9bfb..6311cd048d5 100644 --- a/src/amd/llvm/ac_llvm_util.h +++ b/src/amd/llvm/ac_llvm_util.h @@ -44,10 +44,10 @@ enum ac_float_mode /* Per-thread persistent LLVM objects. */ struct ac_llvm_compiler { LLVMTargetLibraryInfoRef target_library_info; - LLVMPassManagerRef passmgr; /* Default compiler. */ LLVMTargetMachineRef tm; + struct ac_midend_optimizer *meo; struct ac_compiler_passes *passes; /* Optional compiler for faster compilation with fewer optimizations. @@ -86,12 +86,15 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family enum ac_target_machine_options tm_options); void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler); +struct ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm, + bool check_ir); +void ac_destroy_midend_optimiser(struct ac_midend_optimizer *meo); +bool ac_llvm_optimize_module(struct ac_midend_optimizer *meo, LLVMModuleRef module); + struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm); void ac_destroy_llvm_passes(struct ac_compiler_passes *p); bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size); -LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, - bool check_ir); static inline bool ac_has_vec3_support(enum amd_gfx_level chip, bool use_format) { diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 5caf1c025fa..91868b5bef6 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -193,10 +193,9 @@ radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) } static void -ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr) +ac_llvm_finalize_module(struct radv_shader_context *ctx, struct ac_midend_optimizer *meo) { - LLVMRunPassManager(passmgr, ctx->ac.module); - + ac_llvm_optimize_module(meo, ctx->ac.module); ac_llvm_context_dispose(&ctx->ac); } @@ -390,7 +389,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir fprintf(stderr, "\n"); } - ac_llvm_finalize_module(&ctx, ac_llvm->passmgr); + ac_llvm_finalize_module(&ctx, ac_llvm->meo); free(name); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 1342d0e6425..84275bf1d0d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -236,7 +236,7 @@ void si_llvm_optimize_module(struct si_shader_context *ctx) ac_dump_module(ctx->ac.module); /* Run the pass */ - LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module); + ac_llvm_optimize_module(ctx->compiler->meo, ctx->ac.module); } void si_llvm_dispose(struct si_shader_context *ctx) commit 0a352a838a74d0627e76f5e6bfb5e1020cc89b42 Author: Ganesh Belgur Ramachandra Date: Sun Aug 4 12:32:06 2024 -0500 amd,radeonsi: reduce legacy::PassManager use to only run backend passes The legacy::PassManager is only required to run backend optimizations and for code generation. It should be deprecated when the new PM can handle code generation on its own. Reviewed-by: Marek Olšák Part-of: diff --git a/src/amd/llvm/ac_llvm_helper.cpp b/src/amd/llvm/ac_llvm_helper.cpp index 429796f40ad..8ea878943ba 100644 --- a/src/amd/llvm/ac_llvm_helper.cpp +++ b/src/amd/llvm/ac_llvm_helper.cpp @@ -328,48 +328,35 @@ struct ac_midend_optimizer } }; -/* The LLVM compiler is represented as a pass manager containing passes for - * optimizations, instruction selection, and code generation. +/* The backend passes for optimizations, instruction selection, + * and code generation in the LLVM compiler still requires the + * legacy::PassManager. The use of the legacy PM will be + * deprecated when the new PM can handle backend passes. */ -struct ac_compiler_passes { - raw_memory_ostream ostream; /* ELF shader binary stream */ - legacy::PassManager passmgr; /* list of passes */ -}; - -struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm) +struct ac_backend_optimizer { - struct ac_compiler_passes *p = new ac_compiler_passes(); - if (!p) - return NULL; - - TargetMachine *TM = reinterpret_cast(tm); + raw_memory_ostream ostream; /* ELF shader binary stream */ + legacy::PassManager backend_pass_manager; /* for codegen only */ - if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr, + ac_backend_optimizer(TargetMachine *arg_target_machine) + { + /* add backend passes */ + if (arg_target_machine->addPassesToEmitFile(backend_pass_manager, ostream, nullptr, #if LLVM_VERSION_MAJOR >= 18 - CodeGenFileType::ObjectFile)) { + CodeGenFileType::ObjectFile)) { #else - CGFT_ObjectFile)) { + CGFT_ObjectFile)) { #endif - fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); - delete p; - return NULL; + fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); + } } - return p; -} - -void ac_destroy_llvm_passes(struct ac_compiler_passes *p) -{ - delete p; -} -/* This returns false on failure. */ -bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module, - char **pelf_buffer, size_t *pelf_size) -{ - p->passmgr.run(*unwrap(module)); - p->ostream.take(*pelf_buffer, *pelf_size); - return true; -} + void run(Module &module, char *&out_buffer, size_t &out_size) + { + backend_pass_manager.run(module); + ostream.take(out_buffer, out_size); + } +}; ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm, bool check_ir) @@ -393,6 +380,28 @@ bool ac_llvm_optimize_module(ac_midend_optimizer *meo, LLVMModuleRef module) return true; } +ac_backend_optimizer *ac_create_backend_optimizer(LLVMTargetMachineRef tm) +{ + TargetMachine *TM = reinterpret_cast(tm); + return new ac_backend_optimizer(TM); +} + +void ac_destroy_backend_optimizer(ac_backend_optimizer *beo) +{ + delete beo; +} + +bool ac_compile_module_to_elf(ac_backend_optimizer *beo, LLVMModuleRef module, + char **pelf_buffer, size_t *pelf_size) +{ + if (!beo) + return false; + + /* Runs all backend optimizations and code generation */ + beo->run(*unwrap(module), *pelf_buffer, *pelf_size); + return true; +} + LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope) { diff --git a/src/amd/llvm/ac_llvm_util.c b/src/amd/llvm/ac_llvm_util.c index dddcfa32289..c8d5e739726 100644 --- a/src/amd/llvm/ac_llvm_util.c +++ b/src/amd/llvm/ac_llvm_util.c @@ -195,8 +195,9 @@ fail: void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) { - ac_destroy_llvm_passes(compiler->passes); - ac_destroy_llvm_passes(compiler->low_opt_passes); + /* delete the codegen pass managers */ + ac_destroy_backend_optimizer(compiler->beo); + ac_destroy_backend_optimizer(compiler->low_opt_beo); /* delete optimizer pass manager */ if (compiler->meo) diff --git a/src/amd/llvm/ac_llvm_util.h b/src/amd/llvm/ac_llvm_util.h index 6311cd048d5..0097601c9fc 100644 --- a/src/amd/llvm/ac_llvm_util.h +++ b/src/amd/llvm/ac_llvm_util.h @@ -48,13 +48,13 @@ struct ac_llvm_compiler { /* Default compiler. */ LLVMTargetMachineRef tm; struct ac_midend_optimizer *meo; - struct ac_compiler_passes *passes; + struct ac_backend_optimizer *beo; /* Optional compiler for faster compilation with fewer optimizations. * LLVM modules can be created with "tm" too. There is no difference. */ LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */ - struct ac_compiler_passes *low_opt_passes; + struct ac_backend_optimizer *low_opt_beo; }; LLVMTargetRef ac_get_llvm_target(const char *triple); @@ -91,9 +91,9 @@ struct ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm, void ac_destroy_midend_optimiser(struct ac_midend_optimizer *meo); bool ac_llvm_optimize_module(struct ac_midend_optimizer *meo, LLVMModuleRef module); -struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm); -void ac_destroy_llvm_passes(struct ac_compiler_passes *p); -bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module, +struct ac_backend_optimizer *ac_create_backend_optimizer(LLVMTargetMachineRef tm); +void ac_destroy_backend_optimizer(struct ac_backend_optimizer *beo); +bool ac_compile_module_to_elf(struct ac_backend_optimizer *beo, LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size); static inline bool ac_has_vec3_support(enum amd_gfx_level chip, bool use_format) diff --git a/src/amd/vulkan/radv_llvm_helper.cpp b/src/amd/vulkan/radv_llvm_helper.cpp index 53a4b57cca5..615d90a3b58 100644 --- a/src/amd/vulkan/radv_llvm_helper.cpp +++ b/src/amd/vulkan/radv_llvm_helper.cpp @@ -11,7 +11,7 @@ class radv_llvm_per_thread_info { public: radv_llvm_per_thread_info(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size) - : family(arg_family), tm_options(arg_tm_options), wave_size(arg_wave_size), passes(NULL) + : family(arg_family), tm_options(arg_tm_options), wave_size(arg_wave_size), beo(NULL) { } @@ -25,8 +25,8 @@ public: if (!ac_init_llvm_compiler(&llvm_info, family, tm_options)) return false; - passes = ac_create_llvm_passes(llvm_info.tm); - if (!passes) + beo = ac_create_backend_optimizer(llvm_info.tm); + if (!beo) return false; return true; @@ -34,7 +34,7 @@ public: bool compile_to_memory_buffer(LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size) { - return ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size); + return ac_compile_module_to_elf(beo, module, pelf_buffer, pelf_size); } bool is_same(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size) @@ -49,7 +49,7 @@ private: enum radeon_family family; enum ac_target_machine_options tm_options; unsigned wave_size; - struct ac_compiler_passes *passes; + struct ac_backend_optimizer *beo; }; /* we have to store a linked list per thread due to the possibility of multiple gpus being required */ @@ -68,9 +68,9 @@ radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char ** } if (!thread_info) { - struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm); - bool ret = ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size); - ac_destroy_llvm_passes(passes); + struct ac_backend_optimizer *beo = ac_create_backend_optimizer(info->tm); + bool ret = ac_compile_module_to_elf(beo, module, pelf_buffer, pelf_size); + ac_destroy_backend_optimizer(beo); return ret; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9fe1c71a21a..318e7b1894e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -154,9 +154,9 @@ struct ac_llvm_compiler *si_create_llvm_compiler(struct si_screen *sscreen) if (!ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options)) return NULL; - compiler->passes = ac_create_llvm_passes(compiler->tm); + compiler->beo = ac_create_backend_optimizer(compiler->tm); if (compiler->low_opt_tm) - compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm); + compiler->low_opt_beo = ac_create_backend_optimizer(compiler->low_opt_tm); return compiler; #else diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 84275bf1d0d..966cf753c1c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -73,15 +73,15 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary, } if (!si_replace_shader(count, binary)) { - struct ac_compiler_passes *passes = compiler->passes; + struct ac_backend_optimizer *beo = compiler->beo; - if (less_optimized && compiler->low_opt_passes) - passes = compiler->low_opt_passes; + if (less_optimized && compiler->low_opt_beo) + beo = compiler->low_opt_beo; struct si_llvm_diagnostics diag = {debug}; LLVMContextSetDiagnosticHandler(ac->context, si_diagnostic_handler, &diag); - if (!ac_compile_module_to_elf(passes, ac->module, (char **)&binary->code_buffer, + if (!ac_compile_module_to_elf(beo, ac->module, (char **)&binary->code_buffer, &binary->code_size)) diag.retval = 1; @@ -794,7 +794,7 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade static bool si_should_optimize_less(struct ac_llvm_compiler *compiler, struct si_shader_selector *sel) { - if (!compiler->low_opt_passes) + if (!compiler->low_opt_beo) return false; /* Assume a slow CPU. */