LLVM 23.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
23
24namespace llvm {
25
27class SIRegisterInfo;
28class GCNSubtarget;
29class GCNSchedStage;
30
40
41#ifndef NDEBUG
42raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
43#endif
44
45/// This is a minimal scheduler strategy. The main difference between this
46/// and the GenericScheduler is that GCNSchedStrategy uses different
47/// heuristics to determine excess/critical pressure sets.
49protected:
50 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
51
52 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
53 const RegPressureTracker &RPTracker,
54 SchedCandidate &Cand, bool &IsPending,
55 bool IsBottomUp);
56
57 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
58 const RegPressureTracker &RPTracker,
59 const SIRegisterInfo *SRI, unsigned SGPRPressure,
60 unsigned VGPRPressure, bool IsBottomUp);
61
62 /// Estimate how many cycles \p SU must wait due to structural hazards at the
63 /// current boundary cycle. Returns zero when no stall is required.
64 unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const;
65
66 /// Evaluates instructions in the pending queue using a subset of scheduling
67 /// heuristics.
68 ///
69 /// Instructions that cannot be issued due to hardware constraints are placed
70 /// in the pending queue rather than the available queue, making them normally
71 /// invisible to scheduling heuristics. However, in certain scenarios (such as
72 /// avoiding register spilling), it may be beneficial to consider scheduling
73 /// these not-yet-ready instructions.
75 SchedBoundary *Zone) const;
76
77 void printCandidateDecision(const SchedCandidate &Current,
78 const SchedCandidate &Preferred);
79
80 void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker,
81 SUnit *SU, std::vector<unsigned> &Pressure,
82 std::vector<unsigned> &MaxPressure,
85 ScheduleDAGMI *DAG, const SIRegisterInfo *SRI);
86
87 std::vector<unsigned> Pressure;
88
89 std::vector<unsigned> MaxPressure;
90
92
94
96
98
99 // Scheduling stages for this strategy.
101
102 // Pointer to the current SchedStageID.
104
105 // GCN RP Tracker for top-down scheduling
107
108 // GCN RP Tracker for botttom-up scheduling
110
111 bool UseGCNTrackers = false;
112
113 std::optional<bool> GCNTrackersOverride;
114
115public:
116 // schedule() have seen register pressure over the critical limits and had to
117 // track register pressure for actual scheduling heuristics.
119
120 // Schedule known to have excess register pressure. Be more conservative in
121 // increasing ILP and preserving VGPRs.
122 bool KnownExcessRP = false;
123
124 // An error margin is necessary because of poor performance of the generic RP
125 // tracker and can be adjusted up for tuning heuristics to try and more
126 // aggressively reduce register pressure.
127 unsigned ErrorMargin = 3;
128
129 // Bias for SGPR limits under a high register pressure.
130 const unsigned HighRPSGPRBias = 7;
131
132 // Bias for VGPR limits under a high register pressure.
133 const unsigned HighRPVGPRBias = 7;
134
136
138
139 unsigned SGPRLimitBias = 0;
140
141 unsigned VGPRLimitBias = 0;
142
144
145 SUnit *pickNode(bool &IsTopNode) override;
146
147 void schedNode(SUnit *SU, bool IsTopNode) override;
148
149 void initialize(ScheduleDAGMI *DAG) override;
150
151 unsigned getTargetOccupancy() { return TargetOccupancy; }
152
153 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
154
156
157 // Advances stage. Returns true if there are remaining stages.
158 bool advanceStage();
159
160 bool hasNextStage() const;
161
162 bool useGCNTrackers() const {
163 return GCNTrackersOverride.value_or(UseGCNTrackers);
164 }
165
167
169
171};
172
173/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
174/// maximum number of waves per simd).
176public:
178 bool IsLegacyScheduler = false);
179};
180
181/// The goal of this scheduling strategy is to maximize ILP for a single wave
182/// (i.e. latency hiding).
184protected:
185 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
186 SchedBoundary *Zone) const override;
187
188public:
190};
191
192/// The goal of this scheduling strategy is to maximize memory clause for a
193/// single wave.
195protected:
196 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
197 SchedBoundary *Zone) const override;
198
199public:
201};
202
204 unsigned ScheduleLength;
205 unsigned BubbleCycles;
206
207public:
208 ScheduleMetrics() = default;
209 ScheduleMetrics(unsigned L, unsigned BC)
210 : ScheduleLength(L), BubbleCycles(BC) {}
211 unsigned getLength() const { return ScheduleLength; }
212 unsigned getBubbles() const { return BubbleCycles; }
213 unsigned getMetric() const {
214 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
215 // Metric is zero if the amount of bubbles is less than 1% which is too
216 // small. So, return 1.
217 return Metric ? Metric : 1;
218 }
219 static const unsigned ScaleFactor;
220};
221
223 dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
224 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
225 << Sm.getLength() << " ]\n";
226 return OS;
227}
228
229class GCNScheduleDAGMILive;
232 // The live in/out pressure as indexed by the first or last MI in the region
233 // before scheduling.
235 // The mapping of RegionIDx to key instruction
236 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
237 // Whether we are calculating LiveOuts or LiveIns
238 bool IsLiveOut;
239
240public:
241 RegionPressureMap() = default;
243 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
244 // Build the Instr->LiveReg and RegionIdx->Instr maps
245 void buildLiveRegMap();
246
247 // Retrieve the LiveReg for a given RegionIdx
249 assert(IdxToInstruction.contains(RegionIdx));
250 MachineInstr *Key = IdxToInstruction[RegionIdx];
251 return RegionLiveRegMap[Key];
252 }
253};
254
255/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
256/// boundary is inclusive, the upper boundary is exclusive.
258 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
259
261 friend class GCNSchedStage;
266 friend class PreRARematStage;
268 friend class RegionPressureMap;
269
270 const GCNSubtarget &ST;
271
273
274 // Occupancy target at the beginning of function scheduling cycle.
275 unsigned StartingOccupancy;
276
277 // Minimal real occupancy recorder for the function.
278 unsigned MinOccupancy;
279
280 // Vector of regions recorder for later rescheduling
282
283 // Record regions with high register pressure.
284 BitVector RegionsWithHighRP;
285
286 // Record regions with excess register pressure over the physical register
287 // limit. Register pressure in these regions usually will result in spilling.
288 BitVector RegionsWithExcessRP;
289
290 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
291 BitVector RegionsWithIGLPInstrs;
292
293 // Region live-in cache.
295
296 // Region pressure cache.
298
299 // Temporary basic block live-in cache.
301
302 // The map of the initial first region instruction to region live in registers
304
305 // Calculate the map of the initial first region instruction to region live in
306 // registers
308
309 // Calculate the map of the initial last region instruction to region live out
310 // registers
312 getRegionLiveOutMap() const;
313
314 // The live out registers per region. These are internally stored as a map of
315 // the initial last region instruction to region live out registers, but can
316 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
317 RegionPressureMap RegionLiveOuts;
318
319 // Return current region pressure.
320 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
321
322 // Compute and cache live-ins and pressure for all regions in block.
323 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
324
325 /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
326 void setTargetOccupancy(unsigned TargetOccupancy);
327
328 void runSchedStages();
329
330 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
331
332public:
334 std::unique_ptr<MachineSchedStrategy> S);
335
336 void schedule() override;
337
338 void finalizeSchedule() override;
339};
340
341// GCNSchedStrategy applies multiple scheduling stages to a function.
343protected:
345
347
349
351
353
355
356 // The current block being scheduled.
358
359 // Current region index.
360 unsigned RegionIdx = 0;
361
362 // Record the original order of instructions before scheduling.
363 std::vector<MachineInstr *> Unsched;
364
365 // RP before scheduling the current region.
367
368 // RP after scheduling the current region.
370
371 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
372
374
375public:
376 // Initialize state for a scheduling stage. Returns false if the current stage
377 // should be skipped.
378 virtual bool initGCNSchedStage();
379
380 // Finalize state after finishing a scheduling pass on the function.
381 virtual void finalizeGCNSchedStage();
382
383 // Setup for scheduling a region. Returns false if the current region should
384 // be skipped.
385 virtual bool initGCNRegion();
386
387 // Finalize state after scheduling a region.
388 virtual void finalizeGCNRegion();
389
390 // Track whether a new region is also a new MBB.
391 void setupNewBlock();
392
393 // Check result of scheduling.
394 void checkScheduling();
395
396 // computes the given schedule virtual execution time in clocks
397 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
399 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
400 DenseMap<unsigned, unsigned> &ReadyCycles,
401 const TargetSchedModel &SM);
402
403 // Returns true if scheduling should be reverted.
404 virtual bool shouldRevertScheduling(unsigned WavesAfter);
405
406 // Returns true if current region has known excess pressure.
407 bool isRegionWithExcessRP() const {
408 return DAG.RegionsWithExcessRP[RegionIdx];
409 }
410
411 // The region number this stage is currently working on
412 unsigned getRegionIdx() { return RegionIdx; }
413
414 // Returns true if the new schedule may result in more spilling.
415 bool mayCauseSpilling(unsigned WavesAfter);
416
417 /// Sets the schedule of region \p RegionIdx to \p MIOrder. The MIs in \p
418 /// MIOrder must be exactly the same as the ones currently existing inside the
419 /// region, only in a different order that honors def-use chains.
420 void modifyRegionSchedule(unsigned RegionIdx,
422
424
425 virtual ~GCNSchedStage() = default;
426};
427
435
437private:
438 // Record regions with excess archvgpr register pressure over the physical
439 // register limit. Register pressure in these regions usually will result in
440 // spilling.
441 BitVector RegionsWithExcessArchVGPR;
442
443 const SIInstrInfo *TII;
444 const SIRegisterInfo *SRI;
445
446 /// Do a speculative rewrite and collect copy locations. The speculative
447 /// rewrite allows us to calculate the RP of the code after the rewrite, and
448 /// the copy locations allow us to calculate the total cost of copies required
449 /// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
450 /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
451 /// copy locations for defs (of the MFMA operands) in \p CopyForDef
452 bool
453 initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
454 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
456
457 /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
458 /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
459 /// costs, and \p RewriteCands to undo rewriting.
460 int64_t getRewriteCost(
461 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands,
462 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
463 const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
464
465 /// Do the final rewrite on \p RewriteCands and insert any needed copies.
466 bool rewrite(ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands);
467
468 /// \returns true if this MI is a rewrite candidate.
469 bool isRewriteCandidate(MachineInstr *MI) const;
470
471 /// Resets all candidates in \p RewriteCands back to VGPR form.
472 void resetRewriteCandsToVGPR(
473 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands);
474
475 /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
476 /// DefIdxs
477 void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
479
480 /// Finds all the reaching uses of \p DefMI and stores the use operands in \p
481 /// ReachingUses
482 void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS,
484
485public:
486 bool initGCNSchedStage() override;
487
490};
491
493private:
494 // Save the initial occupancy before starting this stage.
495 unsigned InitialOccupancy;
496 // Save the temporary target occupancy before starting this stage.
497 unsigned TempTargetOccupancy;
498 // Track whether any region was scheduled by this stage.
499 bool IsAnyRegionScheduled;
500
501public:
502 bool initGCNSchedStage() override;
503
504 void finalizeGCNSchedStage() override;
505
506 bool initGCNRegion() override;
507
508 bool shouldRevertScheduling(unsigned WavesAfter) override;
509
512};
513
514// Retry function scheduling if we found resulting occupancy and it is
515// lower than used for other scheduling passes. This will give more freedom
516// to schedule low register pressure blocks.
518public:
519 bool initGCNSchedStage() override;
520
521 bool initGCNRegion() override;
522
523 bool shouldRevertScheduling(unsigned WavesAfter) override;
524
527};
528
529/// Attempts to reduce function spilling or, if there is no spilling, to
530/// increase function occupancy by one with respect to register usage by sinking
531/// rematerializable instructions to their use. When the stage estimates that
532/// reducing spilling or increasing occupancy is possible, it tries to
533/// rematerialize as few registers as possible to reduce potential negative
534/// effects on function latency.
535///
536/// The stage only supports rematerializing registers that meet all of the
537/// following constraints.
538/// 1. The register is virtual and has a single defining instruction.
539/// 2. The single defining instruction is either deemed rematerializable by the
540/// target-independent logic, or if not, has no non-constant and
541/// non-ignorable physical register use.
542/// 3 The register has no virtual register use whose live range would be
543/// extended by the rematerialization.
544/// 4. The register has a single non-debug user in a different region from its
545/// defining region.
546/// 5. The register is not used by or using another register that is going to be
547/// rematerialized.
549private:
550 using RegisterIdx = Rematerializer::RegisterIdx;
551
552 /// A scored rematerialization candidate. Higher scores indicate more
553 /// beneficial rematerializations. A null score indicate the rematerialization
554 /// is not helpful to reduce RP in target regions.
555 struct ScoredRemat {
556 /// The register index handle in the rematerializer.
557 RegisterIdx RegIdx;
558 /// Regions in which the register is live-in/live-out/live anywhere.
559 BitVector LiveIn, LiveOut, Live;
560 /// Subset of \ref Live regions in which the rematerialization is not
561 /// guaranteed to reduce RP (i.e., regions in which the register is not
562 /// live-through and unused).
563 BitVector UnpredictableRPSave;
564 /// Expected register pressure decrease induced by rematerializing this
565 /// candidate.
566 GCNRegPressure RPSave;
567
568 /// Execution frequency information required by scoring heuristics.
569 /// Frequencies are scaled down if they are high to avoid overflow/underflow
570 /// when combining them.
571 struct FreqInfo {
572 /// Per-region execution frequencies. 0 when unknown.
574 /// Minimum and maximum observed frequencies.
576
578
579 private:
580 static const uint64_t ScaleFactor = 1024;
581 };
582
583 /// Initializes the candidate with state-independent characteristics for
584 /// rematerializable register with index handle \p RegIdx. This doesn't
585 /// update the actual score (call \ref update for this).
586 void init(RegisterIdx RegIdx, const FreqInfo &Freq,
587 const Rematerializer &Remater, GCNScheduleDAGMILive &DAG);
588
589 /// Rematerializes the candidate using the \p Remater.
590 void rematerialize(Rematerializer &Remater) const;
591
592 /// Determines whether this rematerialization may be beneficial in at least
593 /// one target region.
594 bool maybeBeneficial(const BitVector &TargetRegions,
595 ArrayRef<GCNRPTarget> RPTargets) const;
596
597 /// Rematerializes the candidate and returns the new MI. This removes the
598 /// rematerialized register from live-in/out lists in the \p DAG and updates
599 /// \p RPTargets in all affected regions. Regions in which RP savings are
600 /// not guaranteed are set in \p RecomputeRP.
601 MachineInstr *rematerialize(BitVector &RecomputeRP,
604
605 /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
606 /// \p RegionFreq indicates the frequency of each region.
607 void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
608 const FreqInfo &Freq, bool ReduceSpill);
609
610 /// Returns whether the current score is null, indicating the
611 /// rematerialization is useless.
612 bool hasNullScore() const { return !RegionImpact; }
613
614 /// Compare score components of non-null scores pair-wise. Scores shouldn't
615 /// be null (as defined by \ref hasNullScore).
616 bool operator<(const ScoredRemat &O) const {
617 assert(!hasNullScore() && "this has null score");
618 assert(!O.hasNullScore() && "other has null score");
619 if (MaxFreq != O.MaxFreq)
620 return MaxFreq < O.MaxFreq;
621 if (FreqDiff != O.FreqDiff)
622 return FreqDiff < O.FreqDiff;
623 if (RegionImpact != O.RegionImpact)
624 return RegionImpact < O.RegionImpact;
625 // Break ties using register index handles. If the two registers are
626 // connected in some dependency DAG of rematerializable registers, this
627 // will tend to give a higher score to the register further from the
628 // dependency DAG's root. If the two registers are disconnected, this will
629 // give a higher score to the register with lower virtual register index.
630 // In general, within a region, this should prefer registers defined
631 // earlier that have longer live ranges in their defining region (since
632 // the registers we consider are always live-out in their defining
633 // region).
634 return RegIdx > O.RegIdx;
635 }
636
637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
638 Printable print() const;
639#endif
640
641 private:
642 // The three members below are the scoring components, top to bottom from
643 // most important to least important when comparing candidates.
644
645 /// Frequency of impacted target region with highest known frequency. This
646 /// only matters when the stage is trying to reduce spilling, so it is
647 /// always 0 when it is not.
648 uint64_t MaxFreq;
649 /// Frequency difference between defining and using regions. Negative values
650 /// indicate we are rematerializing to higher frequency regions; positive
651 /// values indicate the contrary.
652 int64_t FreqDiff;
653 /// Expected number of target regions impacted by the rematerialization,
654 /// scaled by the size of the register being rematerialized.
655 unsigned RegionImpact;
656 };
657
658 /// Register pressure targets for all regions.
659 SmallVector<GCNRPTarget> RPTargets;
660 /// Regions which are above the stage's RP target.
661 BitVector TargetRegions;
662 /// The target occupancy the set is trying to achieve. Empty when the
663 /// objective is spilling reduction.
664 std::optional<unsigned> TargetOcc;
665 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
666 unsigned AchievedOcc;
667 /// After successful stage initialization, indicates which regions should be
668 /// rescheduled.
669 BitVector RescheduleRegions;
670
671 /// Underlying utilities to identify and perform rematerializations.
672 Rematerializer Remater;
673
674 struct RollbackSupport {
676 /// The register index handle in the rematerializer.
677 RegisterIdx RegIdx;
678 /// Regions in which the original register was live-in or live-out.
680
684 };
685
686 /// Rollback listener.
687 Rollbacker Listener;
688 /// Registers removed from live-maps along with bitvectors indicationg the
689 /// regions in which they were live-ins and live-outs.
690 SmallVector<LiveMapUpdate> LiveMapUpdates;
691
692 /// Attaches the rollback listener to the rematerializer.
693 RollbackSupport(Rematerializer &Remater) { Remater.addListener(&Listener); }
694 };
695
696 /// Rollback support. Maintained through a unique pointer because it is
697 /// optional and needs to persist between stage initialization and
698 /// finalization.
699 std::unique_ptr<RollbackSupport> Rollback;
700
701 /// State of a region pre-re-scheduling but post-rematerializations that we
702 /// must keep to be able to revert re-scheduling effects.
703 struct RegionSchedRevert {
704 /// Region number;
705 unsigned RegionIdx;
706 /// Original instruction order (both debug and non-debug MIs).
707 std::vector<MachineInstr *> OrigMIOrder;
708 /// Maximum pressure recorded in the region.
709 GCNRegPressure MaxPressure;
710
711 RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
712 const GCNRegPressure &MaxPressure)
713 : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
714 MaxPressure(MaxPressure) {}
715 };
716 /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
717 /// regions.
718 SmallVector<RegionSchedRevert> RegionReverts;
719 /// Whether we should revert all re-scheduled regions.
720 bool RevertAllRegions = false;
721
722 /// Returns the occupancy the stage is trying to achieve.
723 unsigned getStageTargetOccupancy() const;
724
725 /// Determines the stage's objective (increasing occupancy or reducing
726 /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
727 /// achieve that objective and mark those that don't achieve it in \ref
728 /// TargetRegions. Returns whether there is any target region.
729 bool setObjective();
730
731 /// In all regions set in \p Regions, saves pressure \p RPSave and clear it as
732 /// a target if its RP target has been reached.
733 void updateRPTargets(const BitVector &Regions, const GCNRegPressure &RPSave);
734
735 /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
736 /// again all \ref TargetRegions that were optimistically marked as satisfied
737 /// but are actually not, and returns whether there were any such regions.
738 bool updateAndVerifyRPTargets(const BitVector &Regions);
739
740 /// Removes register \p Reg from the live-ins of regions set in \p LiveIn and
741 /// the live-outs of regions set in \p LiveOut.
742 void removeFromLiveMaps(Register Reg, const BitVector &LiveIn,
743 const BitVector &LiveOut);
744
745 /// Adds register \p Reg with mask \p Mask to the live-ins of regions set in
746 /// \p LiveIn and the live-outs of regions set in \p LiveOut.
747 void addToLiveMaps(Register Reg, LaneBitmask Mask, const BitVector &LiveIn,
748 const BitVector &LiveOut);
749
750 /// If remat alone did not increase occupancy to the target one, rollbacks all
751 /// rematerializations and resets live-ins/RP in all regions impacted by the
752 /// stage to their pre-stage values.
753 void finalizeGCNSchedStage() override;
754
755public:
756 bool initGCNSchedStage() override;
757
758 bool initGCNRegion() override;
759
760 void finalizeGCNRegion() override;
761
762 bool shouldRevertScheduling(unsigned WavesAfter) override;
763
765 : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
766 RescheduleRegions(DAG.Regions.size()),
767 Remater(MF, DAG.Regions, *DAG.LIS) {
768 const unsigned NumRegions = DAG.Regions.size();
769 RPTargets.reserve(NumRegions);
770 }
771};
772
780
789
791private:
792 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
793
794 bool HasIGLPInstrs = false;
795
796public:
797 void schedule() override;
798
799 void finalizeSchedule() override;
800
802 std::unique_ptr<MachineSchedStrategy> S,
803 bool RemoveKillFlags);
804};
805
806} // End namespace llvm
807
808#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the DenseMap class.
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
IRTranslator LLVM IR MI
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr unsigned SM(unsigned Version)
MIR-level target-independent rematerialization helpers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
DenseMap< unsigned, LaneBitmask > LiveRegSet
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
void modifyRegionSchedule(unsigned RegionIdx, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx to MIOrder.
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
std::vector< unsigned > MaxPressure
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
const unsigned HighRPVGPRBias
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMILive * DAG
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool shouldRevertScheduling(unsigned WavesAfter) override
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNSchedStage() override
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
Track the current register pressure at some position in the instruction stream, and remember the high...
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
MIR-level target-independent rematerializer.
unsigned RegisterIdx
Index type for rematerializable registers.
RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Rematerializer listener with the ability to re-create deleted registers and rollback rematerializatio...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::iterator iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
BitVector LiveIn
Regions in which the original register was live-in or live-out.
LiveMapUpdate(RegisterIdx RegIdx, const BitVector &LiveIn, const BitVector &LiveOut)
RegisterIdx RegIdx
The register index handle in the rematerializer.
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)