LLVM 23.0.0git
PassBuilderPipelines.cpp
Go to the documentation of this file.
1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/PassManager.h"
30#include "llvm/Pass.h"
157
158using namespace llvm;
159
160namespace llvm {
161
163 "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
164 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
166 "Heuristics-based inliner version"),
168 "Use development mode (runtime-loadable model)"),
170 "Use release mode (AOT-compiled model)")));
171
172/// Flag to enable inline deferral during PGO.
173static cl::opt<bool>
174 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
176 cl::desc("Enable inline deferral during PGO"));
177
178static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
179 cl::init(false), cl::Hidden,
180 cl::desc("Enable module inliner"));
181
183 "mandatory-inlining-first", cl::init(false), cl::Hidden,
184 cl::desc("Perform mandatory inlinings module-wide, before performing "
185 "inlining"));
186
188 "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
189 cl::desc("Eagerly invalidate more analyses in default pipelines"));
190
192 "enable-merge-functions", cl::init(false), cl::Hidden,
193 cl::desc("Enable function merging as part of the optimization pipeline"));
194
196 "enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden,
197 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
198
199static cl::opt<bool>
200 TriggerCrash("opt-pipeline-trigger-crash", cl::init(false), cl::Hidden,
201 cl::desc("Trigger crash in optimization pipeline"));
202
204 "enable-global-analyses", cl::init(true), cl::Hidden,
205 cl::desc("Enable inter-procedural analyses"));
206
207static cl::opt<bool> RunPartialInlining("enable-partial-inlining",
208 cl::init(false), cl::Hidden,
209 cl::desc("Run Partial inlining pass"));
210
212 "extra-vectorizer-passes", cl::init(false), cl::Hidden,
213 cl::desc("Run cleanup optimization passes after vectorization"));
214
215static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
216 cl::desc("Run the NewGVN pass"));
217
218static cl::opt<bool>
219 EnableLoopInterchange("enable-loopinterchange", cl::init(true), cl::Hidden,
220 cl::desc("Enable the LoopInterchange Pass"));
221
222static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
223 cl::init(false), cl::Hidden,
224 cl::desc("Enable Unroll And Jam Pass"));
225
226static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
228 cl::desc("Enable the LoopFlatten Pass"));
229
230static cl::opt<bool>
231 EnableInstrumentor("enable-instrumentor", cl::init(false), cl::Hidden,
232 cl::desc("Enable the Instrumentor Pass"));
233
234static cl::opt<bool>
235 EnableDFAJumpThreading("enable-dfa-jump-thread",
236 cl::desc("Enable DFA jump threading"),
237 cl::init(false), cl::Hidden);
238
239static cl::opt<bool>
240 EnableHotColdSplit("hot-cold-split",
241 cl::desc("Enable hot-cold splitting pass"));
242
243static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false),
245 cl::desc("Enable ir outliner pass"));
246
247static cl::opt<bool>
248 DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden,
249 cl::desc("Disable pre-instrumentation inliner"));
250
252 "preinline-threshold", cl::Hidden, cl::init(75),
253 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
254 "(default = 75)"));
255
256static cl::opt<bool>
257 EnableGVNHoist("enable-gvn-hoist",
258 cl::desc("Enable the GVN hoisting pass (default = off)"));
259
260static cl::opt<bool>
261 EnableGVNSink("enable-gvn-sink",
262 cl::desc("Enable the GVN sinking pass (default = off)"));
263
265 "enable-jump-table-to-switch", cl::init(true),
266 cl::desc("Enable JumpTableToSwitch pass (default = true)"));
267
268// This option is used in simplifying testing SampleFDO optimizations for
269// profile loading.
270static cl::opt<bool>
271 EnableCHR("enable-chr", cl::init(true), cl::Hidden,
272 cl::desc("Enable control height reduction optimization (CHR)"));
273
275 "flattened-profile-used", cl::init(false), cl::Hidden,
276 cl::desc("Indicate the sample profile being used is flattened, i.e., "
277 "no inline hierarchy exists in the profile"));
278
279static cl::opt<bool>
280 EnableMatrix("enable-matrix", cl::init(false), cl::Hidden,
281 cl::desc("Enable lowering of the matrix intrinsics"));
282
284 "enable-mergeicmps", cl::init(true), cl::Hidden,
285 cl::desc("Enable MergeICmps pass in the optimization pipeline"));
286
288 "enable-constraint-elimination", cl::init(true), cl::Hidden,
289 cl::desc(
290 "Enable pass to eliminate conditions based on linear constraints"));
291
293 "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE),
294 cl::desc("Enable the attributor inter-procedural deduction pass"),
296 "enable all full attributor runs"),
298 "enable all attributor-light runs"),
300 "enable module-wide attributor runs"),
302 "enable module-wide attributor-light runs"),
304 "enable call graph SCC attributor runs"),
306 "enable call graph SCC attributor-light runs"),
307 clEnumValN(AttributorRunOption::NONE, "none",
308 "disable attributor runs")));
309
311 "enable-sampled-instrumentation", cl::init(false), cl::Hidden,
312 cl::desc("Enable profile instrumentation sampling (default = off)"));
314 "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
315 cl::desc("Enable the experimental Loop Versioning LICM pass"));
316
318 "instrument-cold-function-only-path", cl::init(""),
319 cl::desc("File path for cold function only instrumentation(requires use "
320 "with --pgo-instrument-cold-function-only)"),
321 cl::Hidden);
322
323// TODO: There is a similar flag in WPD pass, we should consolidate them by
324// parsing the option only once in PassBuilder and share it across both places.
326 "enable-devirtualize-speculatively",
327 cl::desc("Enable speculative devirtualization optimization"),
328 cl::init(false));
329
332
334} // namespace llvm
335
353
354namespace llvm {
356} // namespace llvm
357
359 OptimizationLevel Level) {
360 for (auto &C : PeepholeEPCallbacks)
361 C(FPM, Level);
362}
365 for (auto &C : LateLoopOptimizationsEPCallbacks)
366 C(LPM, Level);
367}
369 OptimizationLevel Level) {
370 for (auto &C : LoopOptimizerEndEPCallbacks)
371 C(LPM, Level);
372}
375 for (auto &C : ScalarOptimizerLateEPCallbacks)
376 C(FPM, Level);
377}
379 OptimizationLevel Level) {
380 for (auto &C : CGSCCOptimizerLateEPCallbacks)
381 C(CGPM, Level);
382}
384 OptimizationLevel Level) {
385 for (auto &C : VectorizerStartEPCallbacks)
386 C(FPM, Level);
387}
389 OptimizationLevel Level) {
390 for (auto &C : VectorizerEndEPCallbacks)
391 C(FPM, Level);
392}
394 OptimizationLevel Level,
396 for (auto &C : OptimizerEarlyEPCallbacks)
397 C(MPM, Level, Phase);
398}
400 OptimizationLevel Level,
402 for (auto &C : OptimizerLastEPCallbacks)
403 C(MPM, Level, Phase);
404}
407 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
408 C(MPM, Level);
409}
412 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
413 C(MPM, Level);
414}
416 OptimizationLevel Level) {
417 for (auto &C : PipelineStartEPCallbacks)
418 C(MPM, Level);
419}
422 for (auto &C : PipelineEarlySimplificationEPCallbacks)
423 C(MPM, Level, Phase);
424}
425
426// Get IR stats with InstCount before/after the optimization pipeline
428 bool IsPreOptimization) {
429 if (AreStatisticsEnabled()) {
430 MPM.addPass(
433 FunctionPropertiesStatisticsPass(IsPreOptimization)));
434 }
435}
436
437// Helper to add AnnotationRemarksPass.
441
442// Helper to check if the current compilation phase is preparing for LTO
447
448// Helper to check if the current compilation phase is preparing for FullLTO
449[[maybe_unused]] static bool isFullLTOPreLink(ThinOrFullLTOPhase Phase) {
451}
452
453// Helper to check if the current compilation phase is preparing for ThinLTO
457
458// Helper to check if the current compilation phase is LTO backend
463
464// Helper to check if the current compilation phase is FullLTO backend
468
469// Helper to check if the current compilation phase is ThinLTO backend
473
474// Helper to wrap conditionally Coro passes.
476 // TODO: Skip passes according to Phase.
477 ModulePassManager CoroPM;
478 CoroPM.addPass(CoroEarlyPass());
479 CGSCCPassManager CGPM;
480 CGPM.addPass(CoroSplitPass());
481 CoroPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
482 CoroPM.addPass(CoroCleanupPass());
483 CoroPM.addPass(GlobalDCEPass());
484 return CoroConditionalWrapper(std::move(CoroPM));
485}
486
487// TODO: Investigate the cost/benefit of tail call elimination on debugging.
489PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
491
493
495 FPM.addPass(CountVisitsPass());
496
497 // Form SSA out of local memory accesses after breaking apart aggregates into
498 // scalars.
499 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
500
501 // Catch trivial redundancies
502 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
503
504 // Hoisting of scalars and load expressions.
505 FPM.addPass(
506 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
507 FPM.addPass(InstCombinePass());
508
509 FPM.addPass(LibCallsShrinkWrapPass());
510
511 invokePeepholeEPCallbacks(FPM, Level);
512
513 FPM.addPass(
514 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
515
516 // Form canonically associated expression trees, and simplify the trees using
517 // basic mathematical properties. For example, this will form (nearly)
518 // minimal multiplication trees.
519 FPM.addPass(ReassociatePass());
520
521 // Add the primary loop simplification pipeline.
522 // FIXME: Currently this is split into two loop pass pipelines because we run
523 // some function passes in between them. These can and should be removed
524 // and/or replaced by scheduling the loop pass equivalents in the correct
525 // positions. But those equivalent passes aren't powerful enough yet.
526 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
527 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
528 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
529 // `LoopInstSimplify`.
530 LoopPassManager LPM1, LPM2;
531
532 // Simplify the loop body. We do this initially to clean up after other loop
533 // passes run, either when iterating on a loop or on inner loops with
534 // implications on the outer loop.
535 LPM1.addPass(LoopInstSimplifyPass());
536 LPM1.addPass(LoopSimplifyCFGPass());
537
538 // Try to remove as much code from the loop header as possible,
539 // to reduce amount of IR that will have to be duplicated. However,
540 // do not perform speculative hoisting the first time as LICM
541 // will destroy metadata that may not need to be destroyed if run
542 // after loop rotation.
543 // TODO: Investigate promotion cap for O1.
544 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
545 /*AllowSpeculation=*/false));
546
547 LPM1.addPass(
548 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
549 // TODO: Investigate promotion cap for O1.
550 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
551 /*AllowSpeculation=*/true));
552 LPM1.addPass(SimpleLoopUnswitchPass());
554 LPM1.addPass(LoopFlattenPass());
555
556 LPM2.addPass(LoopIdiomRecognizePass());
557 LPM2.addPass(IndVarSimplifyPass());
558
560
561 LPM2.addPass(LoopDeletionPass());
562
563 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
564 // because it changes IR to makes profile annotation in back compile
565 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
566 // attributes so we need to make sure and allow the full unroll pass to pay
567 // attention to it.
568 if (!isThinLTOPreLink(Phase) || !PGOOpt ||
569 PGOOpt->Action != PGOOptions::SampleUse)
570 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
571 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
572 PTO.ForgetAllSCEVInLoopUnroll));
573
575
576 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
577 /*UseMemorySSA=*/true));
578 FPM.addPass(
579 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
580 FPM.addPass(InstCombinePass());
581 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
582 // *All* loop passes must preserve it, in order to be able to use it.
583 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
584 /*UseMemorySSA=*/false));
585
586 // Delete small array after loop unroll.
587 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
588
589 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
590 FPM.addPass(MemCpyOptPass());
591
592 // Sparse conditional constant propagation.
593 // FIXME: It isn't clear why we do this *after* loop passes rather than
594 // before...
595 FPM.addPass(SCCPPass());
596
597 // Delete dead bit computations (instcombine runs after to fold away the dead
598 // computations, and then ADCE will run later to exploit any new DCE
599 // opportunities that creates).
600 FPM.addPass(BDCEPass());
601
602 // Run instcombine after redundancy and dead bit elimination to exploit
603 // opportunities opened up by them.
604 FPM.addPass(InstCombinePass());
605 invokePeepholeEPCallbacks(FPM, Level);
606
607 FPM.addPass(CoroElidePass());
608
610
611 // Finally, do an expensive DCE pass to catch all the dead code exposed by
612 // the simplifications and basic cleanup after all the simplifications.
613 // TODO: Investigate if this is too expensive.
614 FPM.addPass(ADCEPass());
615 FPM.addPass(
616 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
617 FPM.addPass(InstCombinePass());
618 invokePeepholeEPCallbacks(FPM, Level);
619
620 return FPM;
621}
622
626 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
627
628 // The O1 pipeline has a separate pipeline creation function to simplify
629 // construction readability.
630 if (Level.getSpeedupLevel() == 1)
631 return buildO1FunctionSimplificationPipeline(Level, Phase);
632
634
637
638 // Form SSA out of local memory accesses after breaking apart aggregates into
639 // scalars.
641
642 // Catch trivial redundancies
643 FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
646
647 // Hoisting of scalars and load expressions.
648 if (EnableGVNHoist)
649 FPM.addPass(GVNHoistPass());
650
651 // Global value numbering based sinking.
652 if (EnableGVNSink) {
653 FPM.addPass(GVNSinkPass());
654 FPM.addPass(
655 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
656 }
657
658 // Speculative execution if the target has divergent branches; otherwise nop.
659 FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
660
661 // Optimize based on known information about branches, and cleanup afterward.
664
665 // Jump table to switch conversion.
668
669 FPM.addPass(
670 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
674
675 invokePeepholeEPCallbacks(FPM, Level);
676
677 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
678 // using the size value profile. Don't perform this when optimizing for size.
679 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse)
681
682 FPM.addPass(TailCallElimPass(/*UpdateFunctionEntryCount=*/
683 isInstrumentedPGOUse()));
684 FPM.addPass(
685 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
686
687 // Form canonically associated expression trees, and simplify the trees using
688 // basic mathematical properties. For example, this will form (nearly)
689 // minimal multiplication trees.
691
694
695 // Add the primary loop simplification pipeline.
696 // FIXME: Currently this is split into two loop pass pipelines because we run
697 // some function passes in between them. These can and should be removed
698 // and/or replaced by scheduling the loop pass equivalents in the correct
699 // positions. But those equivalent passes aren't powerful enough yet.
700 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
701 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
702 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
703 // `LoopInstSimplify`.
704 LoopPassManager LPM1, LPM2;
705
706 // Simplify the loop body. We do this initially to clean up after other loop
707 // passes run, either when iterating on a loop or on inner loops with
708 // implications on the outer loop.
709 LPM1.addPass(LoopInstSimplifyPass());
710 LPM1.addPass(LoopSimplifyCFGPass());
711
712 // Try to remove as much code from the loop header as possible,
713 // to reduce amount of IR that will have to be duplicated. However,
714 // do not perform speculative hoisting the first time as LICM
715 // will destroy metadata that may not need to be destroyed if run
716 // after loop rotation.
717 // TODO: Investigate promotion cap for O1.
718 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
719 /*AllowSpeculation=*/false));
720
721 LPM1.addPass(
722 LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
723 // TODO: Investigate promotion cap for O1.
724 LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
725 /*AllowSpeculation=*/true));
726 LPM1.addPass(
727 SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
729 LPM1.addPass(LoopFlattenPass());
730
731 LPM2.addPass(LoopIdiomRecognizePass());
732 LPM2.addPass(IndVarSimplifyPass());
733
734 {
736 ExtraPasses.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
738 LPM2.addPass(std::move(ExtraPasses));
739 }
740
742
743 LPM2.addPass(LoopDeletionPass());
744
745 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
746 // because it changes IR to makes profile annotation in back compile
747 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
748 // attributes so we need to make sure and allow the full unroll pass to pay
749 // attention to it.
750 if (!isThinLTOPreLink(Phase) || !PGOOpt ||
751 PGOOpt->Action != PGOOptions::SampleUse)
752 LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
753 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
754 PTO.ForgetAllSCEVInLoopUnroll));
755
757
758 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
759 /*UseMemorySSA=*/true));
760 FPM.addPass(
761 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
763 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
764 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
765 // *All* loop passes must preserve it, in order to be able to use it.
766 FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
767 /*UseMemorySSA=*/false));
768
769 // Delete small array after loop unroll.
771
772 // Try vectorization/scalarization transforms that are both improvements
773 // themselves and can allow further folds with GVN and InstCombine.
774 FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
775
776 // Eliminate redundancies.
778 if (RunNewGVN)
779 FPM.addPass(NewGVNPass());
780 else
781 FPM.addPass(GVNPass());
782
783 // Sparse conditional constant propagation.
784 // FIXME: It isn't clear why we do this *after* loop passes rather than
785 // before...
786 FPM.addPass(SCCPPass());
787
788 // Delete dead bit computations (instcombine runs after to fold away the dead
789 // computations, and then ADCE will run later to exploit any new DCE
790 // opportunities that creates).
791 FPM.addPass(BDCEPass());
792
793 // Run instcombine after redundancy and dead bit elimination to exploit
794 // opportunities opened up by them.
796 invokePeepholeEPCallbacks(FPM, Level);
797
798 // Re-consider control flow based optimizations after redundancy elimination,
799 // redo DCE, etc.
802
805
806 // Finally, do an expensive DCE pass to catch all the dead code exposed by
807 // the simplifications and basic cleanup after all the simplifications.
808 // TODO: Investigate if this is too expensive.
809 FPM.addPass(ADCEPass());
810
811 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
812 FPM.addPass(MemCpyOptPass());
813
814 FPM.addPass(DSEPass());
816
818 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
819 /*AllowSpeculation=*/true),
820 /*UseMemorySSA=*/true));
821
822 FPM.addPass(CoroElidePass());
823
825
827 .convertSwitchRangeToICmp(true)
828 .convertSwitchToArithmetic(true)
829 .hoistCommonInsts(true)
830 .sinkCommonInsts(true)));
832 invokePeepholeEPCallbacks(FPM, Level);
833
834 return FPM;
835}
836
837void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
840}
841
842void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
843 OptimizationLevel Level,
844 ThinOrFullLTOPhase LTOPhase) {
845 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
847 return;
848 InlineParams IP;
849
851
852 // FIXME: The hint threshold has the same value used by the regular inliner
853 // when not optimzing for size. This should probably be lowered after
854 // performance testing.
855 // FIXME: this comment is cargo culted from the old pass manager, revisit).
856 IP.HintThreshold = 325;
859 IP, /* MandatoryFirst */ true,
861 CGSCCPassManager &CGPipeline = MIWP.getPM();
862
864 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
865 FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
866 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
867 true))); // Merge & remove basic blocks.
868 FPM.addPass(InstCombinePass()); // Combine silly sequences.
869 invokePeepholeEPCallbacks(FPM, Level);
870
871 CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
872 std::move(FPM), PTO.EagerlyInvalidateAnalyses));
873
874 MPM.addPass(std::move(MIWP));
875
876 // Delete anything that is now dead to make sure that we don't instrument
877 // dead code. Instrumentation can end up keeping dead code around and
878 // dramatically increase code size.
879 MPM.addPass(GlobalDCEPass());
880}
881
882void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
883 OptimizationLevel Level) {
885 // Disable header duplication in loop rotation at -Oz.
887 createFunctionToLoopPassAdaptor(LoopRotatePass(),
888 /*UseMemorySSA=*/false),
889 PTO.EagerlyInvalidateAnalyses));
890 }
891}
892
893void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
894 OptimizationLevel Level, bool RunProfileGen,
895 bool IsCS, bool AtomicCounterUpdate,
896 std::string ProfileFile,
897 std::string ProfileRemappingFile) {
898 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
899
900 if (!RunProfileGen) {
901 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
902 MPM.addPass(
903 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
904 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
905 // RequireAnalysisPass for PSI before subsequent non-module passes.
906 MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
907 return;
908 }
909
910 // Perform PGO instrumentation.
911 MPM.addPass(PGOInstrumentationGen(IsCS ? PGOInstrumentationType::CSFDO
913
914 addPostPGOLoopRotation(MPM, Level);
915 // Add the profile lowering pass.
916 InstrProfOptions Options;
917 if (!ProfileFile.empty())
918 Options.InstrProfileOutput = ProfileFile;
919 // Do counter promotion at Level greater than O0.
920 Options.DoCounterPromotion = true;
921 Options.UseBFIInPromotion = IsCS;
922 if (EnableSampledInstr) {
923 Options.Sampling = true;
924 // With sampling, there is little beneifit to enable counter promotion.
925 // But note that sampling does work with counter promotion.
926 Options.DoCounterPromotion = false;
927 }
928 Options.Atomic = AtomicCounterUpdate;
929 MPM.addPass(InstrProfilingLoweringPass(Options, IsCS));
930}
931
933 bool RunProfileGen, bool IsCS,
934 bool AtomicCounterUpdate,
935 std::string ProfileFile,
936 std::string ProfileRemappingFile) {
937 if (!RunProfileGen) {
938 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
939 MPM.addPass(
940 PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
941 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
942 // RequireAnalysisPass for PSI before subsequent non-module passes.
944 return;
945 }
946
947 // Perform PGO instrumentation.
950 // Add the profile lowering pass.
952 if (!ProfileFile.empty())
953 Options.InstrProfileOutput = ProfileFile;
954 // Do not do counter promotion at O0.
955 Options.DoCounterPromotion = false;
956 Options.UseBFIInPromotion = IsCS;
957 Options.Atomic = AtomicCounterUpdate;
959}
960
962 return getInlineParamsFromOptLevel(Level.getSpeedupLevel());
963}
964
968 InlineParams IP;
969 if (PTO.InlinerThreshold == -1)
971 else
972 IP = getInlineParams(PTO.InlinerThreshold);
973 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
974 // set hot-caller threshold to 0 to disable hot
975 // callsite inline (as much as possible [1]) because it makes
976 // profile annotation in the backend inaccurate.
977 //
978 // [1] Note the cost of a function could be below zero due to erased
979 // prologue / epilogue.
980 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
982
983 if (PGOOpt)
985
989
990 // Require the GlobalsAA analysis for the module so we can query it within
991 // the CGSCC pipeline.
993 MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
994 // Invalidate AAManager so it can be recreated and pick up the newly
995 // available GlobalsAA.
996 MIWP.addModulePass(
998 }
999
1000 // Require the ProfileSummaryAnalysis for the module so we can query it within
1001 // the inliner pass.
1003
1004 // Now begin the main postorder CGSCC pipeline.
1005 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
1006 // manager and trying to emulate its precise behavior. Much of this doesn't
1007 // make a lot of sense and we should revisit the core CGSCC structure.
1008 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
1009
1010 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1011 // generally clean up exception handling overhead. It isn't clear this is
1012 // valuable as the inliner doesn't currently care whether it is inlining an
1013 // invoke or a call.
1014
1016 MainCGPipeline.addPass(AttributorCGSCCPass());
1018 MainCGPipeline.addPass(AttributorLightCGSCCPass());
1019
1020 // Deduce function attributes. We do another run of this after the function
1021 // simplification pipeline, so this only needs to run when it could affect the
1022 // function simplification pipeline, which is only the case with recursive
1023 // functions.
1024 MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
1025
1026 // When at O3 add argument promotion to the pass pipeline.
1027 // FIXME: It isn't at all clear why this should be limited to O3.
1028 if (Level == OptimizationLevel::O3)
1029 MainCGPipeline.addPass(ArgumentPromotionPass());
1030
1031 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
1032 // there are no OpenMP runtime calls present in the module.
1033 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
1034 MainCGPipeline.addPass(OpenMPOptCGSCCPass(Phase));
1035
1036 invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
1037
1038 // Add the core function simplification pipeline nested inside the
1039 // CGSCC walk.
1042 PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
1043
1044 // Finally, deduce any function attributes based on the fully simplified
1045 // function.
1046 MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
1047
1048 // Mark that the function is fully simplified and that it shouldn't be
1049 // simplified again if we somehow revisit it due to CGSCC mutations unless
1050 // it's been modified since.
1053
1054 if (!isThinLTOPreLink(Phase)) {
1055 MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
1056 MainCGPipeline.addPass(CoroAnnotationElidePass());
1057 }
1058
1059 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
1060 MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
1062
1063 return MIWP;
1064}
1065
1070
1072 // For PreLinkThinLTO + SamplePGO or PreLinkFullLTO + SamplePGO,
1073 // set hot-caller threshold to 0 to disable hot
1074 // callsite inline (as much as possible [1]) because it makes
1075 // profile annotation in the backend inaccurate.
1076 //
1077 // [1] Note the cost of a function could be below zero due to erased
1078 // prologue / epilogue.
1079 if (isLTOPreLink(Phase) && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1080 IP.HotCallSiteThreshold = 0;
1081
1082 if (PGOOpt)
1084
1085 // The inline deferral logic is used to avoid losing some
1086 // inlining chance in future. It is helpful in SCC inliner, in which
1087 // inlining is processed in bottom-up order.
1088 // While in module inliner, the inlining order is a priority-based order
1089 // by default. The inline deferral is unnecessary there. So we disable the
1090 // inline deferral logic in module inliner.
1091 IP.EnableDeferral = false;
1092
1095 MPM.addPass(GlobalOptPass());
1096 MPM.addPass(GlobalDCEPass());
1097 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
1098 }
1099
1102 PTO.EagerlyInvalidateAnalyses));
1103
1104 if (!isThinLTOPreLink(Phase)) {
1107 MPM.addPass(
1109 }
1110
1111 return MPM;
1112}
1113
1117 assert(Level != OptimizationLevel::O0 &&
1118 "Should not be used for O0 pipeline");
1119
1121 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1122
1124
1125 // Place pseudo probe instrumentation as the first pass of the pipeline to
1126 // minimize the impact of optimization changes.
1127 if (PGOOpt && PGOOpt->PseudoProbeForProfiling && !isThinLTOPostLink(Phase))
1129
1130 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1131
1132 // In ThinLTO mode, when flattened profile is used, all the available
1133 // profile information will be annotated in PreLink phase so there is
1134 // no need to load the profile again in PostLink.
1135 bool LoadSampleProfile =
1136 HasSampleProfile && !(FlattenedProfileUsed && isThinLTOPostLink(Phase));
1137
1138 // During the ThinLTO backend phase we perform early indirect call promotion
1139 // here, before globalopt. Otherwise imported available_externally functions
1140 // look unreferenced and are removed. If we are going to load the sample
1141 // profile then defer until later.
1142 // TODO: See if we can move later and consolidate with the location where
1143 // we perform ICP when we are loading a sample profile.
1144 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1145 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1146 // determine whether the new direct calls are annotated with prof metadata.
1147 // Ideally this should be determined from whether the IR is annotated with
1148 // sample profile, and not whether the a sample profile was provided on the
1149 // command line. E.g. for flattened profiles where we will not be reloading
1150 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1151 // provide the sample profile file.
1152 if (isThinLTOPostLink(Phase) && !LoadSampleProfile)
1153 MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1154
1155 // Create an early function pass manager to cleanup the output of the
1156 // frontend. Not necessary with LTO post link pipelines since the pre link
1157 // pipeline already cleaned up the frontend output.
1158 if (!isThinLTOPostLink(Phase)) {
1159 // Do basic inference of function attributes from known properties of system
1160 // libraries and other oracles.
1162 MPM.addPass(CoroEarlyPass());
1163
1164 FunctionPassManager EarlyFPM;
1165 EarlyFPM.addPass(EntryExitInstrumenterPass(/*PostInlining=*/false));
1166 // Lower llvm.expect to metadata before attempting transforms.
1167 // Compare/branch metadata may alter the behavior of passes like
1168 // SimplifyCFG.
1170 EarlyFPM.addPass(SimplifyCFGPass());
1172 EarlyFPM.addPass(EarlyCSEPass());
1173 if (Level == OptimizationLevel::O3)
1174 EarlyFPM.addPass(CallSiteSplittingPass());
1176 std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
1177 }
1178
1179 if (LoadSampleProfile) {
1180 // Annotate sample profile right after early FPM to ensure freshness of
1181 // the debug info.
1183 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase, FS));
1184 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1185 // RequireAnalysisPass for PSI before subsequent non-module passes.
1187 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1188 // for the profile annotation to be accurate in the LTO backend.
1189 if (!isLTOPreLink(Phase))
1190 // We perform early indirect call promotion here, before globalopt.
1191 // This is important for the ThinLTO backend phase because otherwise
1192 // imported available_externally functions look unreferenced and are
1193 // removed.
1194 MPM.addPass(
1195 PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1196 }
1197
1198 // Try to perform OpenMP specific optimizations on the module. This is a
1199 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1201
1203 MPM.addPass(AttributorPass());
1206
1207 // Lower type metadata and the type.test intrinsic in the ThinLTO
1208 // post link pipeline after ICP. This is to enable usage of the type
1209 // tests in ICP sequences.
1212
1214
1215 // Interprocedural constant propagation now that basic cleanup has occurred
1216 // and prior to optimizing globals.
1217 // FIXME: This position in the pipeline hasn't been carefully considered in
1218 // years, it should be re-analyzed.
1219 MPM.addPass(
1220 IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/!isLTOPreLink(Phase))));
1221
1222 // Attach metadata to indirect call sites indicating the set of functions
1223 // they may target at run-time. This should follow IPSCCP.
1225
1226 // Optimize globals to try and fold them into constants.
1227 MPM.addPass(GlobalOptPass());
1228
1229 // Create a small function pass pipeline to cleanup after all the global
1230 // optimizations.
1231 FunctionPassManager GlobalCleanupPM;
1232 // FIXME: Should this instead by a run of SROA?
1233 GlobalCleanupPM.addPass(PromotePass());
1234 GlobalCleanupPM.addPass(InstCombinePass());
1235 invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
1236 GlobalCleanupPM.addPass(
1237 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1238 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
1239 PTO.EagerlyInvalidateAnalyses));
1240
1241 // We already asserted this happens in non-FullLTOPostLink earlier.
1242 const bool IsPreLink = !isThinLTOPostLink(Phase);
1243 // Enable contextual profiling instrumentation.
1244 const bool IsCtxProfGen =
1246 const bool IsPGOPreLink = !IsCtxProfGen && PGOOpt && IsPreLink;
1247 const bool IsPGOInstrGen =
1248 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRInstr;
1249 const bool IsPGOInstrUse =
1250 IsPGOPreLink && PGOOpt->Action == PGOOptions::IRUse;
1251 const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
1252 // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
1253 // enable ctx profiling from the frontend.
1255 "Enabling both instrumented PGO and contextual instrumentation is not "
1256 "supported.");
1257 const bool IsCtxProfUse = !UseCtxProfile.empty() && isThinLTOPreLink(Phase);
1258
1259 assert(
1261 "--instrument-cold-function-only-path is provided but "
1262 "--pgo-instrument-cold-function-only is not enabled");
1263 const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly &&
1264 IsPGOPreLink &&
1266
1267 if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1268 IsCtxProfUse || IsColdFuncOnlyInstrGen)
1269 addPreInlinerPasses(MPM, Level, Phase);
1270
1271 // Add all the requested passes for instrumentation PGO, if requested.
1272 if (IsPGOInstrGen || IsPGOInstrUse) {
1273 addPGOInstrPasses(MPM, Level,
1274 /*RunProfileGen=*/IsPGOInstrGen,
1275 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
1276 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1277 } else if (IsCtxProfGen || IsCtxProfUse) {
1279 // In pre-link, we just want the instrumented IR. We use the contextual
1280 // profile in the post-thinlink phase.
1281 // The instrumentation will be removed in post-thinlink after IPO.
1282 // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
1283 // mechanism for GUIDs.
1284 MPM.addPass(AssignGUIDPass());
1285 if (IsCtxProfUse) {
1286 MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
1287 return MPM;
1288 }
1289 // Block further inlining in the instrumented ctxprof case. This avoids
1290 // confusingly collecting profiles for the same GUID corresponding to
1291 // different variants of the function. We could do like PGO and identify
1292 // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
1293 // thinlto to happen before performing any further optimizations, it's
1294 // unnecessary to collect profiles for non-prevailing copies.
1296 addPostPGOLoopRotation(MPM, Level);
1298 } else if (IsColdFuncOnlyInstrGen) {
1299 addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
1300 /* AtomicCounterUpdate */ false,
1302 /* ProfileRemappingFile */ "");
1303 }
1304
1305 if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
1306 MPM.addPass(PGOIndirectCallPromotion(false, false));
1307
1308 if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr)
1309 MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile,
1311
1312 if (IsMemprofUse)
1313 MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, FS));
1314
1315 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRUse ||
1316 PGOOpt->Action == PGOOptions::SampleUse))
1317 MPM.addPass(PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1318
1319 MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1320
1323 else
1324 MPM.addPass(buildInlinerPipeline(Level, Phase));
1325
1326 // Remove any dead arguments exposed by cleanups, constant folding globals,
1327 // and argument promotion.
1329
1332
1333 if (!isThinLTOPreLink(Phase))
1334 MPM.addPass(CoroCleanupPass());
1335
1336 // Optimize globals now that functions are fully simplified.
1337 MPM.addPass(GlobalOptPass());
1338 MPM.addPass(GlobalDCEPass());
1339
1340 return MPM;
1341}
1342
1343/// TODO: Should LTO cause any differences to this set of passes?
1344void PassBuilder::addVectorPasses(OptimizationLevel Level,
1346 ThinOrFullLTOPhase LTOPhase) {
1349
1350 // Drop dereferenceable assumes after vectorization, as they are no longer
1351 // needed and can inhibit further optimization.
1352 if (!isLTOPreLink(LTOPhase))
1353 FPM.addPass(DropUnnecessaryAssumesPass(/*DropDereferenceable=*/true));
1354
1356 if (isFullLTOPostLink(LTOPhase)) {
1357 // The vectorizer may have significantly shortened a loop body; unroll
1358 // again. Unroll small loops to hide loop backedge latency and saturate any
1359 // parallel execution resources of an out-of-order processor. We also then
1360 // need to clean up redundancies and loop invariant code.
1361 // FIXME: It would be really good to use a loop-integrated instruction
1362 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1363 // across the loop nests.
1364 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1367 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1369 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1372 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1373 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1374 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1375 // NOTE: we are very late in the pipeline, and we don't have any LICM
1376 // or SimplifyCFG passes scheduled after us, that would cleanup
1377 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1378
1379 // We also turn on struct to vector canonicalization here, which allows
1380 // converting allocas of homogeneous structs into vector allocas when the
1381 // allocas' users are all memory intrinsics. This allows promotion in some
1382 // cases because structs cannot promote to SSA values, but vectors can. We
1383 // only turn this on after memcpyopt runs because this might hinder
1384 // memcpyopt's optimizations if done before. Look at the documentation for
1385 // `tryCanonicalizeStructToVector` in SROA.cpp to see why.
1387 /*AggregateToVector=*/true)));
1388 }
1389
1390 if (!isFullLTOPostLink(LTOPhase)) {
1391 // Eliminate loads by forwarding stores from the previous iteration to loads
1392 // of the current iteration.
1394 }
1395 // Cleanup after the loop optimization passes.
1396 FPM.addPass(InstCombinePass());
1397
1398 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1399 ExtraFunctionPassManager<ShouldRunExtraVectorPasses> ExtraPasses;
1400 // At higher optimization levels, try to clean up any runtime overlap and
1401 // alignment checks inserted by the vectorizer. We want to track correlated
1402 // runtime checks for two inner loops in the same outer loop, fold any
1403 // common computations, hoist loop-invariant aspects out of any outer loop,
1404 // and unswitch the runtime checks if possible. Once hoisted, we may have
1405 // dead (or speculatable) control flows or more combining opportunities.
1406 ExtraPasses.addPass(EarlyCSEPass());
1407 ExtraPasses.addPass(CorrelatedValuePropagationPass());
1408 ExtraPasses.addPass(InstCombinePass());
1409 LoopPassManager LPM;
1410 LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1411 /*AllowSpeculation=*/true));
1412 LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1414 ExtraPasses.addPass(
1415 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true));
1416 ExtraPasses.addPass(
1417 SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1418 ExtraPasses.addPass(InstCombinePass());
1419 FPM.addPass(std::move(ExtraPasses));
1420 }
1421
1422 // Now that we've formed fast to execute loop structures, we do further
1423 // optimizations. These are run afterward as they might block doing complex
1424 // analyses and transforms such as what are needed for loop vectorization.
1425
1426 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1427 // GVN, loop transforms, and others have already run, so it's now better to
1428 // convert to more optimized IR using more aggressive simplify CFG options.
1429 // The extra sinking transform can create larger basic blocks, so do this
1430 // before SLP vectorization.
1431 FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1432 .forwardSwitchCondToPhi(true)
1433 .convertSwitchRangeToICmp(true)
1434 .convertSwitchToArithmetic(true)
1435 .convertSwitchToLookupTable(true)
1436 .needCanonicalLoops(false)
1437 .hoistCommonInsts(true)
1438 .sinkCommonInsts(true)));
1439
1440 if (isFullLTOPostLink(LTOPhase)) {
1441 FPM.addPass(SCCPPass());
1442 FPM.addPass(InstCombinePass());
1443 FPM.addPass(BDCEPass());
1444 }
1445
1446 // Optimize parallel scalar instruction chains into SIMD instructions.
1447 if (PTO.SLPVectorization) {
1448 FPM.addPass(SLPVectorizerPass());
1449 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1450 FPM.addPass(EarlyCSEPass());
1451 }
1452 }
1453 // Enhance/cleanup vector code.
1454 FPM.addPass(VectorCombinePass());
1455
1456 if (!isFullLTOPostLink(LTOPhase)) {
1457 FPM.addPass(InstCombinePass());
1458 // Unroll small loops to hide loop backedge latency and saturate any
1459 // parallel execution resources of an out-of-order processor. We also then
1460 // need to clean up redundancies and loop invariant code.
1461 // FIXME: It would be really good to use a loop-integrated instruction
1462 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1463 // across the loop nests.
1464 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1465 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1467 LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1468 }
1469 FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
1470 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1471 PTO.ForgetAllSCEVInLoopUnroll)));
1472 FPM.addPass(WarnMissedTransformationsPass());
1473 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1474 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1475 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1476 // NOTE: we are very late in the pipeline, and we don't have any LICM
1477 // or SimplifyCFG passes scheduled after us, that would cleanup
1478 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1479
1480 // We also turn on struct to vector canonicalization here, which allows
1481 // converting allocas of homogeneous structs into vector allocas when the
1482 // allocas' users are all memory intrinsics. This allows promotion in some
1483 // cases because structs cannot promote to SSA values, but vectors can. We
1484 // only turn this on after memcpyopt runs because this might hinder
1485 // memcpyopt's optimizations if done before. Look at the documentation for
1486 // `tryCanonicalizeStructToVector` in SROA.cpp to see why.
1487 FPM.addPass(SROAPass(SROAOptions(SROAOptions::PreserveCFG,
1488 /*AggregateToVector=*/true)));
1489 }
1490
1491 FPM.addPass(InferAlignmentPass());
1492 FPM.addPass(InstCombinePass());
1493
1494 // This is needed for two reasons:
1495 // 1. It works around problems that instcombine introduces, such as sinking
1496 // expensive FP divides into loops containing multiplications using the
1497 // divide result.
1498 // 2. It helps to clean up some loop-invariant code created by the loop
1499 // unroll pass when IsFullLTO=false.
1501 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1502 /*AllowSpeculation=*/true),
1503 /*UseMemorySSA=*/true));
1504
1505 // Now that we've vectorized and unrolled loops, we may have more refined
1506 // alignment information, try to re-derive it here.
1507 FPM.addPass(AlignmentFromAssumptionsPass());
1508}
1509
1512 ThinOrFullLTOPhase LTOPhase) {
1514
1515 // Run partial inlining pass to partially inline functions that have
1516 // large bodies.
1519
1520 // Remove avail extern fns and globals definitions since we aren't compiling
1521 // an object file for later LTO. For LTO we want to preserve these so they
1522 // are eligible for inlining at link-time. Note if they are unreferenced they
1523 // will be removed by GlobalDCE later, so this only impacts referenced
1524 // available externally globals. Eventually they will be suppressed during
1525 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1526 // may make globals referenced by available external functions dead and saves
1527 // running remaining passes on the eliminated functions. These should be
1528 // preserved during prelinking for link-time inlining decisions.
1529 if (!isLTOPreLink(LTOPhase))
1531
1532 // Do RPO function attribute inference across the module to forward-propagate
1533 // attributes where applicable.
1534 // FIXME: Is this really an optimization rather than a canonicalization?
1536
1537 // Do a post inline PGO instrumentation and use pass. This is a context
1538 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1539 // cross-module inline has not been done yet. The context sensitive
1540 // instrumentation is after all the inlines are done.
1541 if (!isLTOPreLink(LTOPhase) && PGOOpt) {
1542 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1543 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1544 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1545 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
1546 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1547 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1548 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
1549 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
1550 }
1551
1552 // Re-compute GlobalsAA here prior to function passes. This is particularly
1553 // useful as the above will have inlined, DCE'ed, and function-attr
1554 // propagated everything. We should at this point have a reasonably minimal
1555 // and richly annotated call graph. By computing aliasing and mod/ref
1556 // information for all local globals here, the late loop passes and notably
1557 // the vectorizer will be able to use them to help recognize vectorizable
1558 // memory operations.
1561
1562 invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
1563
1564 FunctionPassManager OptimizePM;
1565
1566 // Only drop unnecessary assumes post-inline and post-link, as otherwise
1567 // additional uses of the affected value may be introduced through inlining
1568 // and CSE.
1569 if (!isLTOPreLink(LTOPhase))
1570 OptimizePM.addPass(DropUnnecessaryAssumesPass());
1571
1572 // Scheduling LoopVersioningLICM when inlining is over, because after that
1573 // we may see more accurate aliasing. Reason to run this late is that too
1574 // early versioning may prevent further inlining due to increase of code
1575 // size. Other optimizations which runs later might get benefit of no-alias
1576 // assumption in clone loop.
1578 OptimizePM.addPass(
1580 // LoopVersioningLICM pass might increase new LICM opportunities.
1582 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1583 /*AllowSpeculation=*/true),
1584 /*USeMemorySSA=*/true));
1585 }
1586
1587 OptimizePM.addPass(Float2IntPass());
1589
1590 if (EnableMatrix) {
1591 OptimizePM.addPass(LowerMatrixIntrinsicsPass());
1592 OptimizePM.addPass(EarlyCSEPass());
1593 }
1594
1595 // CHR pass should only be applied with the profile information.
1596 // The check is to check the profile summary information in CHR.
1597 if (EnableCHR && Level == OptimizationLevel::O3)
1598 OptimizePM.addPass(ControlHeightReductionPass());
1599
1600 // FIXME: We need to run some loop optimizations to re-rotate loops after
1601 // simplifycfg and others undo their rotation.
1602
1603 // Optimize the loop execution. These passes operate on entire loop nests
1604 // rather than on each loop in an inside-out manner, and so they are actually
1605 // function passes.
1606
1607 invokeVectorizerStartEPCallbacks(OptimizePM, Level);
1608
1609 LoopPassManager LPM;
1610 // First rotate loops that may have been un-rotated by prior passes.
1611 // Disable header duplication at -Oz.
1612 LPM.addPass(LoopRotatePass(/*EnableLoopHeaderDuplication=*/true,
1613 isLTOPreLink(LTOPhase),
1614 /*CheckExitCount=*/true));
1615 // Some loops may have become dead by now. Try to delete them.
1616 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1617 // this may need to be revisited once we run GVN before loop deletion
1618 // in the simplification pipeline.
1619 LPM.addPass(LoopDeletionPass());
1620
1621 if (PTO.LoopInterchange)
1622 LPM.addPass(LoopInterchangePass());
1623
1624 OptimizePM.addPass(
1625 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
1626
1627 // FIXME: This may not be the right place in the pipeline.
1628 // We need to have the data to support the right place.
1629 if (PTO.LoopFusion)
1630 OptimizePM.addPass(LoopFusePass());
1631
1632 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1633 // into separate loop that would otherwise inhibit vectorization. This is
1634 // currently only performed for loops marked with the metadata
1635 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1636 OptimizePM.addPass(LoopDistributePass());
1637
1638 // Populates the VFABI attribute with the scalar-to-vector mappings
1639 // from the TargetLibraryInfo.
1640 OptimizePM.addPass(InjectTLIMappings());
1641
1642 addVectorPasses(Level, OptimizePM, LTOPhase);
1643
1644 invokeVectorizerEndEPCallbacks(OptimizePM, Level);
1645
1646 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1647 // canonicalization pass that enables other optimizations. As a result,
1648 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1649 // result too early.
1650 OptimizePM.addPass(LoopSinkPass());
1651
1652 // And finally clean up LCSSA form before generating code.
1653 OptimizePM.addPass(InstSimplifyPass());
1654
1655 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1656 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1657 // flattening of blocks.
1658 OptimizePM.addPass(DivRemPairsPass());
1659
1660 // Merge adjacent icmps into memcmp, then expand memcmp to loads/compares.
1661 // TODO: move this furter up so that it can be optimized by GVN, etc.
1662 if (EnableMergeICmps)
1663 OptimizePM.addPass(MergeICmpsPass());
1664 OptimizePM.addPass(ExpandMemCmpPass());
1665
1666 // Try to annotate calls that were created during optimization.
1667 OptimizePM.addPass(
1668 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
1669
1670 // LoopSink (and other loop passes since the last simplifyCFG) might have
1671 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1672 OptimizePM.addPass(
1674 .convertSwitchRangeToICmp(true)
1675 .convertSwitchToArithmetic(true)
1676 .speculateUnpredictables(true)
1677 .hoistLoadsStoresWithCondFaulting(true)));
1678
1679 // Add the core optimizing pipeline.
1680 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
1681 PTO.EagerlyInvalidateAnalyses));
1682
1683 // AllocToken transforms heap allocation calls; this needs to run late after
1684 // other allocation call transformations (such as those in InstCombine).
1685 if (!isLTOPreLink(LTOPhase))
1686 MPM.addPass(AllocTokenPass());
1687
1688 invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
1689
1690 // Run the Instrumentor pass late.
1692 MPM.addPass(InstrumentorPass(FS));
1693
1694 // Split out cold code. Splitting is done late to avoid hiding context from
1695 // other optimizations and inadvertently regressing performance. The tradeoff
1696 // is that this has a higher code size cost than splitting early.
1697 if (EnableHotColdSplit && !isLTOPreLink(LTOPhase))
1699
1700 // Search the code for similar regions of code. If enough similar regions can
1701 // be found where extracting the regions into their own function will decrease
1702 // the size of the program, we extract the regions, a deduplicate the
1703 // structurally similar regions.
1704 if (EnableIROutliner)
1705 MPM.addPass(IROutlinerPass());
1706
1707 // Now we need to do some global optimization transforms.
1708 // FIXME: It would seem like these should come first in the optimization
1709 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1710 // ordering here.
1711 MPM.addPass(GlobalDCEPass());
1713
1714 // Merge functions if requested. It has a better chance to merge functions
1715 // after ConstantMerge folded jump tables.
1716 if (PTO.MergeFunctions)
1718
1719 if (PTO.CallGraphProfile && !isLTOPreLink(LTOPhase))
1720 MPM.addPass(CGProfilePass(isLTOPostLink(LTOPhase)));
1721
1722 // RelLookupTableConverterPass runs later in LTO post-link pipeline.
1723 if (!isLTOPreLink(LTOPhase))
1725
1726 // Add devirtualization pass only when LTO is not enabled, as otherwise
1727 // the pass is already enabled in the LTO pipeline.
1728 if (PTO.DevirtualizeSpeculatively && LTOPhase == ThinOrFullLTOPhase::None) {
1729 // TODO: explore a better pipeline configuration that can improve
1730 // compilation time overhead.
1732 /*ExportSummary*/ nullptr,
1733 /*ImportSummary*/ nullptr,
1734 /*DevirtSpeculatively*/ PTO.DevirtualizeSpeculatively));
1736 // Given that the devirtualization creates more opportunities for inlining,
1737 // we run the Inliner again here to maximize the optimization gain we
1738 // get from devirtualization.
1739 // Also, we can't run devirtualization before inlining because the
1740 // devirtualization depends on the passes optimizing/eliminating vtable GVs
1741 // and those passes are only effective after inlining.
1742 if (EnableModuleInliner) {
1746 } else {
1749 /* MandatoryFirst */ true,
1751 }
1752 }
1753 return MPM;
1754}
1755
1759 if (Level == OptimizationLevel::O0)
1760 return buildO0DefaultPipeline(Level, Phase);
1761
1763 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1764 // Currently this pipeline is only invoked in an LTO pre link pass or when we
1765 // are not running LTO. If that changes the below checks may need updating.
1767
1768 // If we are invoking this in non-LTO mode, remove any MemProf related
1769 // attributes and metadata, as we don't know whether we are linking with
1770 // a library containing the necessary interfaces.
1773
1774 // Convert @llvm.global.annotations to !annotation metadata.
1776
1777 // Force any function attributes we want the rest of the pipeline to observe.
1779
1780 if (TriggerCrash)
1782
1783 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1785
1786 // Apply module pipeline start EP callback.
1788
1789 // Add the core simplification pipeline.
1791
1792 // Now add the optimization pipeline.
1794
1795 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1796 PGOOpt->Action == PGOOptions::SampleUse)
1798
1799 // Emit annotation remarks.
1801
1802 if (isLTOPreLink(Phase))
1803 addRequiredLTOPreLinkPasses(MPM);
1804
1805 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1806 return MPM;
1807}
1808
1811 bool EmitSummary) {
1813
1814 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1815
1816 if (ThinLTO)
1818 else
1820 MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary));
1821
1822 // Perform any cleanups to the IR that aren't suitable for per TU compilation,
1823 // like removing CFI/WPD related instructions. Note, we reuse
1824 // DropTypeTestsPass to clean up type tests rather than duplicate that logic
1825 // in FatLtoCleanup.
1826 MPM.addPass(FatLtoCleanup());
1827
1828 // If we're doing FatLTO w/ CFI enabled, we don't want the type tests in the
1829 // object code, only in the bitcode section, so drop it before we run
1830 // module optimization and generate machine code. If llvm.type.test() isn't in
1831 // the IR, this won't do anything.
1833
1834 // Use the ThinLTO post-link pipeline with sample profiling
1835 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1836 MPM.addPass(buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1837 else {
1838 // ModuleSimplification does not run the coroutine passes for
1839 // ThinLTOPreLink, so we need the coroutine passes to run for ThinLTO
1840 // builds, otherwise they will miscompile.
1841 if (ThinLTO) {
1842 // TODO: replace w/ buildCoroWrapper() when it takes phase and level into
1843 // consideration.
1844 CGSCCPassManager CGPM;
1848 MPM.addPass(CoroCleanupPass());
1849 }
1850
1851 // otherwise, just use module optimization
1852 MPM.addPass(
1854 // Emit annotation remarks.
1856 }
1857
1858 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1859
1860 return MPM;
1861}
1862
1865 if (Level == OptimizationLevel::O0)
1867
1869
1870 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1871
1872 // Convert @llvm.global.annotations to !annotation metadata.
1874
1875 // Force any function attributes we want the rest of the pipeline to observe.
1877
1878 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1880
1881 // Apply module pipeline start EP callback.
1883
1884 // If we are planning to perform ThinLTO later, we don't bloat the code with
1885 // unrolling/vectorization/... now. Just simplify the module as much as we
1886 // can.
1889 // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1890 // thinlto use the contextual info to perform imports; then use the contextual
1891 // profile in the post-thinlink phase.
1892 if (!UseCtxProfile.empty()) {
1893 addRequiredLTOPreLinkPasses(MPM);
1894 return MPM;
1895 }
1896
1897 // Run partial inlining pass to partially inline functions that have
1898 // large bodies.
1899 // FIXME: It isn't clear whether this is really the right place to run this
1900 // in ThinLTO. Because there is another canonicalization and simplification
1901 // phase that will run after the thin link, running this here ends up with
1902 // less information than will be available later and it may grow functions in
1903 // ways that aren't beneficial.
1906
1907 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1908 PGOOpt->Action == PGOOptions::SampleUse)
1910
1911 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1912 // optimization is going to be done in PostLink stage, but clang can't add
1913 // callbacks there in case of in-process ThinLTO called by linker.
1918
1919 // Emit annotation remarks.
1921
1922 addRequiredLTOPreLinkPasses(MPM);
1923
1924 instructionCountersPass(MPM, /* IsPreOptimization */ false);
1925
1926 return MPM;
1927}
1928
1930 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1932
1933 instructionCountersPass(MPM, /* IsPreOptimization */ true);
1934
1935 // If we are invoking this without a summary index noting that we are linking
1936 // with a library containing the necessary APIs, remove any MemProf related
1937 // attributes and metadata.
1938 if (!ImportSummary || !ImportSummary->withSupportsHotColdNew())
1940
1941 if (ImportSummary) {
1942 // For ThinLTO we must apply the context disambiguation decisions early, to
1943 // ensure we can correctly match the callsites to summary data.
1946 ImportSummary, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1947
1948 // These passes import type identifier resolutions for whole-program
1949 // devirtualization and CFI. They must run early because other passes may
1950 // disturb the specific instruction patterns that these passes look for,
1951 // creating dependencies on resolutions that may not appear in the summary.
1952 //
1953 // For example, GVN may transform the pattern assume(type.test) appearing in
1954 // two basic blocks into assume(phi(type.test, type.test)), which would
1955 // transform a dependency on a WPD resolution into a dependency on a type
1956 // identifier resolution for CFI.
1957 //
1958 // Also, WPD has access to more precise information than ICP and can
1959 // devirtualize more effectively, so it should operate on the IR first.
1960 //
1961 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1962 // metadata and intrinsics.
1963 MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
1964 MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
1965 }
1966
1967 if (Level == OptimizationLevel::O0) {
1968 // Run a second time to clean up any type tests left behind by WPD for use
1969 // in ICP.
1972
1973 // AllocToken transforms heap allocation calls; this needs to run late after
1974 // other allocation call transformations (such as those in InstCombine).
1975 MPM.addPass(AllocTokenPass());
1976
1977 // Drop available_externally and unreferenced globals. This is necessary
1978 // with ThinLTO in order to avoid leaving undefined references to dead
1979 // globals in the object file.
1981 MPM.addPass(GlobalDCEPass());
1982 return MPM;
1983 }
1984 if (!UseCtxProfile.empty()) {
1985 MPM.addPass(
1987 } else {
1988 // Add the core simplification pipeline.
1991 }
1992 // Now add the optimization pipeline.
1995
1996 // Emit annotation remarks.
1998
1999 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2000
2001 return MPM;
2002}
2003
2006 // FIXME: We should use a customized pre-link pipeline!
2007 return buildPerModuleDefaultPipeline(Level,
2009}
2010
2013 ModuleSummaryIndex *ExportSummary) {
2015
2016 instructionCountersPass(MPM, /* IsPreOptimization */ true);
2017
2019
2020 // If we are invoking this without a summary index noting that we are linking
2021 // with a library containing the necessary APIs, remove any MemProf related
2022 // attributes and metadata.
2023 if (!ExportSummary || !ExportSummary->withSupportsHotColdNew())
2025
2026 // Create a function that performs CFI checks for cross-DSO calls with targets
2027 // in the current module.
2028 MPM.addPass(CrossDSOCFIPass());
2029
2030 if (Level == OptimizationLevel::O0) {
2031 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
2032 // metadata and intrinsics.
2033 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2034 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2035 // Run a second time to clean up any type tests left behind by WPD for use
2036 // in ICP.
2038
2040
2041 // AllocToken transforms heap allocation calls; this needs to run late after
2042 // other allocation call transformations (such as those in InstCombine).
2043 MPM.addPass(AllocTokenPass());
2044
2046
2047 // Emit annotation remarks.
2049
2050 return MPM;
2051 }
2052
2053 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2054 // Load sample profile before running the LTO optimization pipeline.
2055 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2056 PGOOpt->ProfileRemappingFile,
2058 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2059 // RequireAnalysisPass for PSI before subsequent non-module passes.
2061 }
2062
2063 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
2065
2066 // Remove unused virtual tables to improve the quality of code generated by
2067 // whole-program devirtualization and bitset lowering.
2068 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2069
2070 // Do basic inference of function attributes from known properties of system
2071 // libraries and other oracles.
2073
2074 if (Level.getSpeedupLevel() > 1) {
2076 CallSiteSplittingPass(), PTO.EagerlyInvalidateAnalyses));
2077
2078 // Indirect call promotion. This should promote all the targets that are
2079 // left by the earlier promotion pass that promotes intra-module targets.
2080 // This two-step promotion is to save the compile time. For LTO, it should
2081 // produce the same result as if we only do promotion here.
2083 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2084
2085 // Promoting by-reference arguments to by-value exposes more constants to
2086 // IPSCCP.
2087 CGSCCPassManager CGPM;
2090 CGPM.addPass(
2093
2094 // Propagate constants at call sites into the functions they call. This
2095 // opens opportunities for globalopt (and inlining) by substituting function
2096 // pointers passed as arguments to direct uses of functions.
2097 MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/true)));
2098
2099 // Attach metadata to indirect call sites indicating the set of functions
2100 // they may target at run-time. This should follow IPSCCP.
2102 }
2103
2104 // Do RPO function attribute inference across the module to forward-propagate
2105 // attributes where applicable.
2106 // FIXME: Is this really an optimization rather than a canonicalization?
2108
2109 // Use in-range annotations on GEP indices to split globals where beneficial.
2110 MPM.addPass(GlobalSplitPass());
2111
2112 // Run whole program optimization of virtual call when the list of callees
2113 // is fixed.
2114 MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
2115
2117 // Stop here at -O1.
2118 if (Level == OptimizationLevel::O1) {
2119 // The LowerTypeTestsPass needs to run to lower type metadata and the
2120 // type.test intrinsics. The pass does nothing if CFI is disabled.
2121 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2122 // Run a second time to clean up any type tests left behind by WPD for use
2123 // in ICP (which is performed earlier than this in the regular LTO
2124 // pipeline).
2126
2128
2129 // AllocToken transforms heap allocation calls; this needs to run late after
2130 // other allocation call transformations (such as those in InstCombine).
2131 MPM.addPass(AllocTokenPass());
2132
2134
2135 // Emit annotation remarks.
2137
2138 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2139
2140 return MPM;
2141 }
2142
2143 // TODO: Skip to match buildCoroWrapper.
2144 MPM.addPass(CoroEarlyPass());
2145
2146 // Optimize globals to try and fold them into constants.
2147 MPM.addPass(GlobalOptPass());
2148
2149 // Promote any localized globals to SSA registers.
2151
2152 // Linking modules together can lead to duplicate global constant, only
2153 // keep one copy of each constant.
2155
2156 // Remove unused arguments from functions.
2158
2159 // Reduce the code after globalopt and ipsccp. Both can open up significant
2160 // simplification opportunities, and both can propagate functions through
2161 // function pointers. When this happens, we often have to resolve varargs
2162 // calls, etc, so let instcombine do this.
2163 FunctionPassManager PeepholeFPM;
2164 PeepholeFPM.addPass(InstCombinePass());
2165 if (Level.getSpeedupLevel() > 1)
2166 PeepholeFPM.addPass(AggressiveInstCombinePass());
2167 invokePeepholeEPCallbacks(PeepholeFPM, Level);
2168
2169 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
2170 PTO.EagerlyInvalidateAnalyses));
2171
2172 // Lower variadic functions for supported targets prior to inlining.
2174
2175 // Note: historically, the PruneEH pass was run first to deduce nounwind and
2176 // generally clean up exception handling overhead. It isn't clear this is
2177 // valuable as the inliner doesn't currently care whether it is inlining an
2178 // invoke or a call.
2179 // Run the inliner now.
2180 if (EnableModuleInliner) {
2184 } else {
2187 /* MandatoryFirst */ true,
2190 }
2191
2192 // Perform context disambiguation after inlining, since that would reduce the
2193 // amount of additional cloning required to distinguish the allocation
2194 // contexts.
2197 /*Summary=*/nullptr,
2198 PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
2199
2200 // Optimize globals again after we ran the inliner.
2201 MPM.addPass(GlobalOptPass());
2202
2203 // Run the OpenMPOpt pass again after global optimizations.
2205
2206 // Garbage collect dead functions.
2207 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2208
2209 // If we didn't decide to inline a function, check to see if we can
2210 // transform it to pass arguments by value instead of by reference.
2211 CGSCCPassManager CGPM;
2216
2218 // The IPO Passes may leave cruft around. Clean up after them.
2219 FPM.addPass(InstCombinePass());
2220 invokePeepholeEPCallbacks(FPM, Level);
2221
2224
2226
2227 // Do a post inline PGO instrumentation and use pass. This is a context
2228 // sensitive PGO pass.
2229 if (PGOOpt) {
2230 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
2231 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
2232 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2233 PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile);
2234 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
2235 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
2236 /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
2237 PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
2238 }
2239
2240 // Break up allocas
2242
2243 // LTO provides additional opportunities for tailcall elimination due to
2244 // link-time inlining, and visibility of nocapture attribute.
2245 FPM.addPass(
2246 TailCallElimPass(/*UpdateFunctionEntryCount=*/isInstrumentedPGOUse()));
2247
2248 // Run a few AA driver optimizations here and now to cleanup the code.
2249 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
2250 PTO.EagerlyInvalidateAnalyses));
2251
2252 MPM.addPass(
2254
2255 // Require the GlobalsAA analysis for the module so we can query it within
2256 // MainFPM.
2259 // Invalidate AAManager so it can be recreated and pick up the newly
2260 // available GlobalsAA.
2261 MPM.addPass(
2263 }
2264
2265 FunctionPassManager MainFPM;
2267 LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
2268 /*AllowSpeculation=*/true),
2269 /*USeMemorySSA=*/true));
2270
2271 if (RunNewGVN)
2272 MainFPM.addPass(NewGVNPass());
2273 else
2274 MainFPM.addPass(GVNPass());
2275
2276 // Remove dead memcpy()'s.
2277 MainFPM.addPass(MemCpyOptPass());
2278
2279 // Nuke dead stores.
2280 MainFPM.addPass(DSEPass());
2281 MainFPM.addPass(MoveAutoInitPass());
2283
2284 invokeVectorizerStartEPCallbacks(MainFPM, Level);
2285
2286 LoopPassManager LPM;
2287 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
2288 LPM.addPass(LoopFlattenPass());
2289 LPM.addPass(IndVarSimplifyPass());
2290 LPM.addPass(LoopDeletionPass());
2291 // FIXME: Add loop interchange.
2292
2293 // Unroll small loops and perform peeling.
2294 LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
2295 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
2296 PTO.ForgetAllSCEVInLoopUnroll));
2297 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
2298 // *All* loop passes must preserve it, in order to be able to use it.
2299 MainFPM.addPass(
2300 createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false));
2301
2302 MainFPM.addPass(LoopDistributePass());
2303
2304 addVectorPasses(Level, MainFPM, ThinOrFullLTOPhase::FullLTOPostLink);
2305
2306 invokeVectorizerEndEPCallbacks(MainFPM, Level);
2307
2308 // Run the OpenMPOpt CGSCC pass again late.
2311
2312 invokePeepholeEPCallbacks(MainFPM, Level);
2313 MainFPM.addPass(JumpThreadingPass());
2314 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
2315 PTO.EagerlyInvalidateAnalyses));
2316
2317 // Lower type metadata and the type.test intrinsic. This pass supports
2318 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
2319 // to be run at link time if CFI is enabled. This pass does nothing if
2320 // CFI is disabled.
2321 MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
2322 // Run a second time to clean up any type tests left behind by WPD for use
2323 // in ICP (which is performed earlier than this in the regular LTO pipeline).
2325
2326 // Enable splitting late in the FullLTO post-link pipeline.
2329
2330 // Add late LTO optimization passes.
2331 FunctionPassManager LateFPM;
2332
2333 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
2334 // canonicalization pass that enables other optimizations. As a result,
2335 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
2336 // result too early.
2337 LateFPM.addPass(LoopSinkPass());
2338
2339 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
2340 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
2341 // flattening of blocks.
2342 LateFPM.addPass(DivRemPairsPass());
2343
2344 // Delete basic blocks, which optimization passes may have killed.
2346 .convertSwitchRangeToICmp(true)
2347 .convertSwitchToArithmetic(true)
2348 .hoistCommonInsts(true)
2349 .speculateUnpredictables(true)));
2350 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
2351
2352 // Drop bodies of available eternally objects to improve GlobalDCE.
2354
2355 // Now that we have optimized the program, discard unreachable functions.
2356 MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
2357
2358 if (PTO.MergeFunctions)
2360
2362
2363 if (PTO.CallGraphProfile)
2364 MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
2365
2366 MPM.addPass(CoroCleanupPass());
2367
2368 // AllocToken transforms heap allocation calls; this needs to run late after
2369 // other allocation call transformations (such as those in InstCombine).
2370 MPM.addPass(AllocTokenPass());
2371
2373
2374 // Emit annotation remarks.
2376
2377 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2378
2379 return MPM;
2380}
2381
2385 assert(Level == OptimizationLevel::O0 &&
2386 "buildO0DefaultPipeline should only be used with O0");
2387
2389
2390 instructionCountersPass(MPM, /* IsPreOptimization */ true);
2391
2392 // Perform pseudo probe instrumentation in O0 mode. This is for the
2393 // consistency between different build modes. For example, a LTO build can be
2394 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2395 // the postlink will require pseudo probe instrumentation in the prelink.
2396 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2398
2399 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2400 PGOOpt->Action == PGOOptions::IRUse))
2402 MPM,
2403 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2404 /*IsCS=*/false, PGOOpt->AtomicCounterUpdate, PGOOpt->ProfileFile,
2405 PGOOpt->ProfileRemappingFile);
2406
2407 // Instrument function entry and exit before all inlining.
2409 EntryExitInstrumenterPass(/*PostInlining=*/false)));
2410
2412
2413 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2415
2416 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
2417 // Explicitly disable sample loader inlining and use flattened profile in O0
2418 // pipeline.
2419 MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
2420 PGOOpt->ProfileRemappingFile,
2422 /*DisableSampleProfileInlining=*/true,
2423 /*UseFlattenedProfile=*/true));
2424 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
2425 // RequireAnalysisPass for PSI before subsequent non-module passes.
2427 }
2428
2430
2431 // Build a minimal pipeline based on the semantics required by LLVM,
2432 // which is just that always inlining occurs. Further, disable generating
2433 // lifetime intrinsics to avoid enabling further optimizations during
2434 // code generation.
2436 /*InsertLifetimeIntrinsics=*/false));
2437
2438 if (PTO.MergeFunctions)
2440
2441 if (EnableMatrix)
2442 MPM.addPass(
2444
2445 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2446 CGSCCPassManager CGPM;
2448 if (!CGPM.isEmpty())
2450 }
2451 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2452 LoopPassManager LPM;
2454 if (!LPM.isEmpty()) {
2456 createFunctionToLoopPassAdaptor(std::move(LPM))));
2457 }
2458 }
2459 if (!LoopOptimizerEndEPCallbacks.empty()) {
2460 LoopPassManager LPM;
2462 if (!LPM.isEmpty()) {
2464 createFunctionToLoopPassAdaptor(std::move(LPM))));
2465 }
2466 }
2467 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2470 if (!FPM.isEmpty())
2471 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2472 }
2473
2475
2476 if (!VectorizerStartEPCallbacks.empty()) {
2479 if (!FPM.isEmpty())
2480 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2481 }
2482
2483 if (!VectorizerEndEPCallbacks.empty()) {
2486 if (!FPM.isEmpty())
2487 MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
2488 }
2489
2491
2492 // AllocToken transforms heap allocation calls; this needs to run late after
2493 // other allocation call transformations (such as those in InstCombine).
2494 if (!isLTOPreLink(Phase))
2495 MPM.addPass(AllocTokenPass());
2496
2498
2500 MPM.addPass(InstrumentorPass(FS));
2501
2502 if (isLTOPreLink(Phase))
2503 addRequiredLTOPreLinkPasses(MPM);
2504
2505 // Emit annotation remarks.
2507
2508 instructionCountersPass(MPM, /* IsPreOptimization */ false);
2509
2510 return MPM;
2511}
2512
2514 AAManager AA;
2515
2516 // The order in which these are registered determines their priority when
2517 // being queried.
2518
2519 // Add any target-specific alias analyses that should be run early.
2520 if (TM)
2521 TM->registerEarlyDefaultAliasAnalyses(AA);
2522
2523 // First we register the basic alias analysis that provides the majority of
2524 // per-function local AA logic. This is a stateless, on-demand local set of
2525 // AA techniques.
2526 AA.registerFunctionAnalysis<BasicAA>();
2527
2528 // Next we query fast, specialized alias analyses that wrap IR-embedded
2529 // information about aliasing.
2530 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2531 AA.registerFunctionAnalysis<TypeBasedAA>();
2532
2533 // Add support for querying global aliasing information when available.
2534 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2535 // analysis, all that the `AAManager` can do is query for any *cached*
2536 // results from `GlobalsAA` through a readonly proxy.
2538 AA.registerModuleAnalysis<GlobalsAA>();
2539
2540 // Add target-specific alias analyses.
2541 if (TM)
2542 TM->registerDefaultAliasAnalyses(AA);
2543
2544 return AA;
2545}
2546
2547bool PassBuilder::isInstrumentedPGOUse() const {
2548 return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) ||
2549 !UseCtxProfile.empty();
2550}
aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
Provides passes to inlining "always_inline" functions.
This is the interface for LLVM's primary stateless and local alias analysis.
This file provides the interface for LLVM's Call Graph Profile pass.
This header provides classes for managing passes over SCCs of the call graph.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file provides the interface for a simple, fast CSE pass.
This file provides a pass which clones the current module and runs the provided pass pipeline on the ...
This file provides a pass manager that only runs its passes if the provided marker analysis has been ...
Super simple passes to force specific function attrs from the commandline into the IR for debugging p...
Provides passes for computing function attributes based on interprocedural analyses.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...
This header defines various interfaces for pass management in LLVM.
Interfaces for passes which infer implicit function attributes from the name and signature of functio...
This file provides the primary interface to the instcombine pass.
Defines passes for running instruction simplification across chunks of IR.
This file provides the interface for LLVM's PGO Instrumentation lowering pass.
See the comments on JumpThreadingPass.
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the Loop Fusion pass.
This header defines the LoopLoadEliminationPass object.
This header provides classes for managing a pipeline of passes over loops in LLVM IR.
The header file for the LowerConstantIntrinsics pass as used by the new pass manager.
The header file for the LowerExpectIntrinsic pass as used by the new pass manager.
This pass performs merges of loads and stores on both sides of a.
This file provides the interface for LLVM's Global Value Numbering pass.
This header enumerates the LLVM-provided high-level optimization levels.
This file provides the interface for IR based instrumentation passes ( (profile-gen,...
Define option tunables for PGO.
static bool isThinLTOPostLink(ThinOrFullLTOPhase Phase)
static void addAnnotationRemarksPass(ModulePassManager &MPM)
static CoroConditionalWrapper buildCoroWrapper(ThinOrFullLTOPhase Phase)
static bool isFullLTOPostLink(ThinOrFullLTOPhase Phase)
static bool isThinLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPreLink(ThinOrFullLTOPhase Phase)
static void instructionCountersPass(ModulePassManager &MPM, bool IsPreOptimization)
static bool isFullLTOPreLink(ThinOrFullLTOPhase Phase)
static bool isLTOPostLink(ThinOrFullLTOPhase Phase)
This file implements relative lookup table converter that converts lookup tables to relative lookup t...
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file provides the interface for the pseudo probe implementation for AutoFDO.
This file provides the interface for the sampled PGO loader pass.
This is the interface for a metadata-based scoped no-alias analysis.
This file provides the interface for the pass responsible for both simplifying and canonicalizing the...
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This is the interface for a metadata-based TBAA.
Defines the virtual file system interface vfs::FileSystem.
A manager for alias analyses.
A module pass that rewrites heap allocations to use token-enabled allocation functions based on vario...
Definition AllocToken.h:36
Inlines functions marked as "always_inline".
Argument promotion pass.
Assign a GUID to functions as metadata.
Analysis pass providing a never-invalidated alias analysis result.
Simple pass that canonicalizes aliases.
A pass that merges duplicate global constants into a single constant.
This class implements a trivial dead store elimination.
Eliminate dead arguments (and return values) from functions.
A pass that transforms external global definitions into declarations.
Pass embeds a copy of the module optimized with the provided pass pipeline into a global variable.
A pass manager to run a set of extra loop passes if the MarkerTy analysis is present.
Statistics pass for the FunctionPropertiesAnalysis results.
The core GVN pass object.
Definition GVN.h:131
Pass to remove unused function declarations.
Definition GlobalDCE.h:38
Optimize globals that never have their address taken.
Definition GlobalOpt.h:25
Pass to perform split of global variables.
Definition GlobalSplit.h:26
Analysis pass providing a never-invalidated alias analysis result.
Pass to outline cold regions.
Pass to perform interprocedural constant propagation.
Definition SCCP.h:48
Pass to outline similar regions.
Definition IROutliner.h:467
Run instruction simplification across each instruction in the function.
Instrumentation based profiling lowering pass.
The Instrumentor pass.
This pass performs 'jump threading', which looks at blocks that have multiple predecessors and multip...
Performs Loop Invariant Code Motion Pass.
Definition LICM.h:66
Loop unroll pass that only does full loop unrolling and peeling.
Performs Loop Idiom Recognize Pass.
Performs Loop Inst Simplify Pass.
A simple loop rotation transformation.
Performs basic CFG simplifications to assist other loop passes.
A pass that does profile-guided sinking of instructions into loops.
Definition LoopSink.h:33
A simple loop rotation transformation.
Loop unroll pass that will support both full and partial unrolling.
Strips MemProf attributes and metadata.
Merge identical functions.
The module inliner pass for the new pass manager.
Module pass, wrapping the inliner pass.
Definition Inliner.h:65
void addModulePass(T Pass)
Add a module pass that runs before the CGSCC passes.
Definition Inliner.h:81
Class to hold module path string table and global value map, and encapsulate methods for operating on...
Simple pass that provides a name to every anonymous globals.
Additional 'norecurse' attribute deduction during postlink LTO phase.
OpenMP optimizations pass.
Definition OpenMPOpt.h:42
static LLVM_ABI const OptimizationLevel O3
Optimize for fast execution as much as possible.
static LLVM_ABI const OptimizationLevel O0
Disable as many optimizations as possible.
static LLVM_ABI const OptimizationLevel O2
Optimize for fast execution as much as possible without triggering significant incremental compile ti...
static LLVM_ABI const OptimizationLevel O1
Optimize quickly without destroying debuggability.
static LLVM_ABI bool isCtxIRPGOInstrEnabled()
The indirect function call promotion pass.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The instrumentation (profile-instr-gen) pass for IR based PGO.
The profile annotation (profile-instr-use) pass for IR based PGO.
The profile size based optimization pass for memory intrinsics.
Pass to remove unused function declarations.
LLVM_ABI void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining as well as the inlining-driven cleanups.
LLVM_ABI void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI AAManager buildDefaultAAPipeline()
Build the default AAManager with the default alias analysis pipeline registered.
LLVM_ABI void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, ThinLTO-targeting default optimization pipeline to a pass manager.
LLVM_ABI void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, bool AtomicCounterUpdate, std::string ProfileFile, std::string ProfileRemappingFile)
Add PGOInstrumenation passes for O0 only.
LLVM_ABI void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build a per-module default optimization pipeline.
LLVM_ABI void invokePipelineStartEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI void invokeVectorizerEndEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase=ThinOrFullLTOPhase::None)
Build an O0 pipeline with the minimal semantically required passes.
LLVM_ABI FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM function canonicalization and simplification pipeline.
LLVM_ABI void invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level)
LLVM_ABI void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary)
Build an LTO default optimization pipeline to a pass manager.
LLVM_ABI ModulePassManager buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the module pipeline that performs inlining with module inliner pass.
LLVM_ABI ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary)
Build a ThinLTO default optimization pipeline to a pass manager.
LLVM_ABI void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM, OptimizationLevel Level)
LLVM_ABI void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level)
LLVM_ABI ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, bool EmitSummary)
Build a fat object default optimization pipeline.
LLVM_ABI ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase)
Construct the core LLVM module canonicalization and simplification pipeline.
LLVM_ABI ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase)
Construct the core LLVM module optimization pipeline.
LLVM_ABI void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase Phase)
LLVM_ABI ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level)
Build a pre-link, LTO-targeting default optimization pipeline to a pass manager.
LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)
bool isEmpty() const
Returns if the pass manager contains any passes.
unsigned LicmMssaNoAccForPromotionCap
Tuning option to disable promotion to scalars in LICM with MemorySSA, if the number of access is too ...
Definition PassBuilder.h:78
bool SLPVectorization
Tuning option to enable/disable slp loop vectorization, set based on opt level.
Definition PassBuilder.h:56
int InlinerThreshold
Tuning option to override the default inliner threshold.
Definition PassBuilder.h:92
bool LoopFusion
Tuning option to enable/disable loop fusion. Its default value is false.
Definition PassBuilder.h:66
bool CallGraphProfile
Tuning option to enable/disable call graph profile.
Definition PassBuilder.h:82
bool MergeFunctions
Tuning option to enable/disable function merging.
Definition PassBuilder.h:89
bool ForgetAllSCEVInLoopUnroll
Tuning option to forget all SCEV loops in LoopUnroll.
Definition PassBuilder.h:70
unsigned LicmMssaOptCap
Tuning option to cap the number of calls to retrive clobbering accesses in MemorySSA,...
Definition PassBuilder.h:74
bool LoopInterleaving
Tuning option to set loop interleaving on/off, set based on opt level.
Definition PassBuilder.h:48
LLVM_ABI PipelineTuningOptions()
Constructor sets pipeline tuning defaults based on cl::opts.
bool LoopUnrolling
Tuning option to enable/disable loop unrolling. Its default value is true.
Definition PassBuilder.h:59
bool LoopInterchange
Tuning option to enable/disable loop interchange.
Definition PassBuilder.h:63
bool LoopVectorization
Tuning option to enable/disable loop vectorization, set based on opt level.
Definition PassBuilder.h:52
Reassociate commutative expressions.
Definition Reassociate.h:74
A pass to do RPO deduction and propagation of function attributes.
This pass performs function-level constant propagation and merging.
Definition SCCP.h:30
The sample profiler data loader pass.
Analysis pass providing a never-invalidated alias analysis result.
This pass transforms loops that contain branches or switches on loop- invariant conditions to have mu...
A pass to simplify and canonicalize the CFG of a function.
Definition SimplifyCFG.h:30
Analysis pass providing a never-invalidated alias analysis result.
Optimize scalar/vector interactions in IR using target cost models.
Interfaces for registering analysis passes, producing common pass manager configurations,...
Abstract Attribute helper functions.
Definition Attributor.h:165
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ All
Drop only llvm.assumes using type test value.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
static cl::opt< bool > RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass"))
static cl::opt< bool > DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner"))
static cl::opt< bool > PerformMandatoryInliningsFirst("mandatory-inlining-first", cl::init(false), cl::Hidden, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining"))
static cl::opt< bool > RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Run Partial inlining pass"))
static cl::opt< bool > EnableGVNSink("enable-gvn-sink", cl::desc("Enable the GVN sinking pass (default = off)"))
static cl::opt< bool > EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner"))
static cl::opt< bool > EnableEagerlyInvalidateAnalyses("eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines"))
static cl::opt< bool > EnableMatrix("enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics"))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
cl::opt< std::string > UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden, cl::desc("Use the specified contextual profile file"))
static cl::opt< bool > EnableSampledInstr("enable-sampled-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable profile instrumentation sampling (default = off)"))
static cl::opt< bool > EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass"))
static cl::opt< InliningAdvisorMode > UseInlineAdvisor("enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version"), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)"), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)")))
static cl::opt< bool > EnableJumpTableToSwitch("enable-jump-table-to-switch", cl::init(true), cl::desc("Enable JumpTableToSwitch pass (default = true)"))
PassManager< LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult & > CGSCCPassManager
The CGSCC pass manager.
static cl::opt< bool > EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass"))
@ CGSCC_LIGHT
@ MODULE_LIGHT
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ FullLTOPreLink
Full LTO prelink phase.
Definition Pass.h:85
@ ThinLTOPostLink
ThinLTO postlink (backend compile) phase.
Definition Pass.h:83
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
@ FullLTOPostLink
Full LTO postlink (backend compile) phase.
Definition Pass.h:87
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
PassManager< Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater & > LoopPassManager
The Loop pass manager.
static cl::opt< bool > EnableConstraintElimination("enable-constraint-elimination", cl::init(true), cl::Hidden, cl::desc("Enable pass to eliminate conditions based on linear constraints"))
ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO"))
Flag to enable inline deferral during PGO.
FunctionToLoopPassAdaptor createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA=false)
A function to deduce a loop pass type and wrap it in the templated adaptor.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)
A function to deduce a function pass type and wrap it in the templated adaptor.
LLVM_ABI cl::opt< bool > ForgetSCEVInLoopUnroll
PassManager< Module > ModulePassManager
Convenience typedef for a pass manager over modules.
static cl::opt< bool > EnablePostPGOLoopRotation("enable-post-pgo-loop-rotation", cl::init(true), cl::Hidden, cl::desc("Run the loop rotation transformation after PGO instrumentation"))
LLVM_ABI bool AreStatisticsEnabled()
Check if statistics are enabled.
static cl::opt< std::string > InstrumentColdFuncOnlyPath("instrument-cold-function-only-path", cl::init(""), cl::desc("File path for cold function only instrumentation(requires use " "with --pgo-instrument-cold-function-only)"), cl::Hidden)
static cl::opt< bool > EnableGlobalAnalyses("enable-global-analyses", cl::init(true), cl::Hidden, cl::desc("Enable inter-procedural analyses"))
static cl::opt< bool > EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading"), cl::init(false), cl::Hidden)
static cl::opt< bool > FlattenedProfileUsed("flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierarchy exists in the profile"))
static cl::opt< AttributorRunOption > AttributorRun("attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass"), cl::values(clEnumValN(AttributorRunOption::FULL, "full", "enable all full attributor runs"), clEnumValN(AttributorRunOption::LIGHT, "light", "enable all attributor-light runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::MODULE_LIGHT, "module-light", "enable module-wide attributor-light runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::CGSCC_LIGHT, "cgscc-light", "enable call graph SCC attributor-light runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs")))
static cl::opt< bool > EnableLoopInterchange("enable-loopinterchange", cl::init(true), cl::Hidden, cl::desc("Enable the LoopInterchange Pass"))
static cl::opt< bool > ExtraVectorizerPasses("extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization"))
static cl::opt< bool > EnableHotColdSplit("hot-cold-split", cl::desc("Enable hot-cold splitting pass"))
cl::opt< bool > EnableMemProfContextDisambiguation
Enable MemProf context disambiguation for thin link.
static cl::opt< bool > TriggerCrash("opt-pipeline-trigger-crash", cl::init(false), cl::Hidden, cl::desc("Trigger crash in optimization pipeline"))
PassManager< Function > FunctionPassManager
Convenience typedef for a pass manager over functions.
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
cl::opt< bool > PGOInstrumentColdFunctionOnly
static cl::opt< bool > EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)"))
static cl::opt< bool > EnableMergeFunctions("enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline"))
static cl::opt< bool > EnableDevirtualizeSpeculatively("enable-devirtualize-speculatively", cl::desc("Enable speculative devirtualization optimization"), cl::init(false))
static cl::opt< bool > EnableGVNHoist("enable-gvn-hoist", cl::desc("Enable the GVN hoisting pass (default = off)"))
LLVM_ABI cl::opt< unsigned > SetLicmMssaNoAccForPromotionCap
LLVM_ABI InlineParams getInlineParamsFromOptLevel(unsigned OptLevel)
Generate the parameters to tune the inline cost analysis based on command line options.
static cl::opt< bool > EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass"))
static cl::opt< int > PreInlineThreshold("preinline-threshold", cl::Hidden, cl::init(75), cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)"))
static cl::opt< bool > UseLoopVersioningLICM("enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass"))
cl::opt< unsigned > MaxDevirtIterations("max-devirt-iterations", cl::ReallyHidden, cl::init(4))
LLVM_ABI cl::opt< unsigned > SetLicmMssaOptCap
static cl::opt< bool > EnableInstrumentor("enable-instrumentor", cl::init(false), cl::Hidden, cl::desc("Enable the Instrumentor Pass"))
static cl::opt< bool > EnableMergeICmps("enable-mergeicmps", cl::init(true), cl::Hidden, cl::desc("Enable MergeICmps pass in the optimization pipeline"))
A DCE pass that assumes instructions are dead until proven otherwise.
Definition ADCE.h:31
Pass to convert @llvm.global.annotations to !annotation metadata.
This pass attempts to minimize the number of assume without loosing any information.
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
A more lightweight version of the Attributor which only runs attribute inference but no simplificatio...
Hoist/decompose integer division and remainder instructions to enable CFG improvements and better cod...
Definition DivRemPairs.h:23
A simple and fast domtree-based CSE pass.
Definition EarlyCSE.h:31
Pass which forces specific function attributes into the IR, primarily as a debugging tool.
A simple and fast domtree-based GVN pass to hoist common expressions from sibling branches.
Definition GVN.h:506
Uses an "inverted" value numbering to decide the similarity of expressions and sinks similar expressi...
Definition GVN.h:513
A set of parameters to control various transforms performed by IPSCCP pass.
Definition SCCP.h:35
A pass which infers function attributes from the names and signatures of function declarations in a m...
Provides context on when an inline advisor is constructed in the pipeline (e.g., link phase,...
Thresholds to tune inline cost analysis.
Definition InlineCost.h:207
std::optional< int > OptSizeHintThreshold
Threshold to use for callees with inline hint, when the caller is optimized for size.
Definition InlineCost.h:216
std::optional< int > HotCallSiteThreshold
Threshold to use when the callsite is considered hot.
Definition InlineCost.h:228
int DefaultThreshold
The default threshold to start with for a callee.
Definition InlineCost.h:209
std::optional< bool > EnableDeferral
Indicate whether we should allow inline deferral.
Definition InlineCost.h:241
std::optional< int > HintThreshold
Threshold to use for callees with inline hint.
Definition InlineCost.h:212
Options for the frontend instrumentation based profiling pass.
A no-op pass template which simply forces a specific analysis result to be invalidated.
Pass to forward loads in a loop around the backedge to subsequent iterations.
A set of parameters used to control various transforms performed by the LoopUnroll pass.
The LoopVectorize Pass.
Computes function attributes in post-order over the call graph.
A utility pass template to force an analysis result to be available.