LLVM 23.0.0git
SystemZMachineScheduler.cpp
Go to the documentation of this file.
1//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11
12using namespace llvm;
13
14#define DEBUG_TYPE "machine-scheduler"
15
16/// Pre-RA scheduling ///
17
18static bool isRegDef(const MachineOperand &MO) {
19 return MO.isReg() && MO.isDef();
20}
21
22static bool isPhysRegDef(const MachineOperand &MO) {
23 return isRegDef(MO) && MO.getReg().isPhysical();
24}
25
26void SystemZPreRASchedStrategy::initializeLatencyReduction() {
27 // Enable latency reduction for a region that has a considerable amount of
28 // data sequences that should be interlaved. These are SUs that only have
29 // one data predecessor / successor edge(s) to their adjacent instruction(s)
30 // in the input order. Disable if region has many SUs relative to the
31 // overall height.
32 unsigned DAGHeight = 0;
33 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx)
34 DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight());
35 RegionPolicy.DisableLatencyHeuristic =
36 DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u);
37 if ((HasDataSequences = !RegionPolicy.DisableLatencyHeuristic)) {
38 unsigned CurrSequence = 0, NumSeqNodes = 0;
39 auto countSequence = [&CurrSequence, &NumSeqNodes]() {
40 if (CurrSequence >= 2)
41 NumSeqNodes += CurrSequence;
42 CurrSequence = 0;
43 };
44 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
45 const SUnit *SU = &DAG->SUnits[Idx];
46 bool InDataSequence = true;
47 // One Data pred to MI just above, or no preds.
48 unsigned NumPreds = 0;
49 for (const SDep &Pred : SU->Preds)
50 if (++NumPreds != 1 || Pred.getKind() != SDep::Data ||
51 Pred.getSUnit()->NodeNum != Idx - 1)
52 InDataSequence = false;
53 // One Data succ or no succs (ignoring ExitSU).
54 unsigned NumSuccs = 0;
55 for (const SDep &Succ : SU->Succs)
56 if (Succ.getSUnit() != &DAG->ExitSU &&
57 (++NumSuccs != 1 || Succ.getKind() != SDep::Data))
58 InDataSequence = false;
59 // Another type of node or one that does not have a single data pred
60 // ends any previous sequence.
61 if (!InDataSequence || !NumPreds)
62 countSequence();
63 if (InDataSequence)
64 CurrSequence++;
65 }
66 countSequence();
67 if (NumSeqNodes >= std::max(size_t(4), DAG->SUnits.size() / 4)) {
68 LLVM_DEBUG(dbgs() << "Number of nodes in def-use sequences: "
69 << NumSeqNodes << ". ";);
70 } else
71 HasDataSequences = false;
72 }
73}
74
75bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI,
76 bool CCDef) const {
77 if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() &&
78 (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) || !CCDef)) {
79 const MachineOperand &MO0 = MI->getOperand(0);
80 assert(!isPhysRegDef(MO0) && "Did not expect physreg def!");
81 if (isRegDef(MO0) && MO0.getReg() == Cmp0SrcReg)
82 return true;
83 }
84 return false;
85}
86
87static int biasPhysRegExtra(const SUnit *SU) {
88 if (int Res = biasPhysReg(SU, /*isTop=*/false))
89 return Res;
90
91 // Also recognize Load Address. Most of these are with an FI operand.
92 const MachineInstr *MI = SU->getInstr();
93 return MI->getNumOperands() && !MI->isCopy() &&
94 isPhysRegDef(MI->getOperand(0));
95}
96
98 SchedCandidate &TryCand,
99 SchedBoundary *Zone) const {
100 assert(Zone && !Zone->isTop() && "Bottom-Up scheduling only.");
101
102 // Initialize the candidate if needed.
103 if (!Cand.isValid()) {
104 TryCand.Reason = FirstValid;
105 return true;
106 }
107
108 // Bias physreg defs and copies to their uses and definitions respectively.
109 int TryCandPRegBias = biasPhysRegExtra(TryCand.SU);
110 int CandPRegBias = biasPhysRegExtra(Cand.SU);
111 if (tryGreater(TryCandPRegBias, CandPRegBias, TryCand, Cand, PhysReg))
112 return TryCand.Reason != NoCand;
113 if (TryCandPRegBias && CandPRegBias) {
114 // Both biased same way.
115 tryGreater(TryCand.SU->NodeNum, Cand.SU->NodeNum, TryCand, Cand, NodeOrder);
116 return TryCand.Reason != NoCand;
117 }
118
119 // Don't extend the scheduled latency in regions with many nodes in data
120 // sequences, or for (single block loop) regions that are acyclically
121 // (within a single loop iteration) latency limited. IsAcyclicLatencyLimited
122 // is set only after initialization in registerRoots(), which is why it is
123 // checked here instead of earlier.
124 if (!RegionPolicy.DisableLatencyHeuristic &&
125 (HasDataSequences || Rem.IsAcyclicLatencyLimited))
126 if (const SUnit *HigherSU =
127 TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU
128 : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU
129 : nullptr)
130 if (HigherSU->getHeight() > Zone->getScheduledLatency() &&
131 HigherSU->getDepth() < computeRemLatency(*Zone)) {
132 // One or both SUs increase the scheduled latency.
133 tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand,
135 return TryCand.Reason != NoCand;
136 }
137
138 // Weak edges help copy coalescing.
139 if (tryLess(TryCand.SU->WeakSuccsLeft, Cand.SU->WeakSuccsLeft, TryCand, Cand,
140 Weak))
141 return TryCand.Reason != NoCand;
142
143 // Help compare with zero elimination.
144 if (tryGreater(definesCmp0Src(TryCand.SU->getInstr()),
145 definesCmp0Src(Cand.SU->getInstr()), TryCand, Cand, Weak))
146 return TryCand.Reason != NoCand;
147
148 // Fall through to original instruction order.
149 if (TryCand.SU->NodeNum > Cand.SU->NodeNum) {
150 TryCand.Reason = NodeOrder;
151 return true;
152 }
153
154 return false;
155}
156
159 unsigned NumRegionInstrs) {
160 // Avoid setting up the register pressure tracker for small regions to save
161 // compile time. Currently only used for computeCyclicCriticalPath() which
162 // is used for single block loops.
163 MachineBasicBlock *MBB = Begin->getParent();
164 RegionPolicy.ShouldTrackPressure =
165 MBB->isSuccessor(MBB) && NumRegionInstrs >= 8;
166
167 // These heuristics has so far seemed to work better without adding a
168 // top-down boundary.
169 RegionPolicy.OnlyBottomUp = true;
171 this->NumRegionInstrs = NumRegionInstrs;
172}
173
176
177 Cmp0SrcReg = SystemZ::NoRegister;
178
179 initializeLatencyReduction();
180 LLVM_DEBUG(dbgs() << "Latency scheduling " << (HasDataSequences ? "" : "not ")
181 << "enabled for data sequences.\n";);
182}
183
185 GenericScheduler::schedNode(SU, IsTopNode);
186
187 const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(DAG->TII);
188 MachineInstr *MI = SU->getInstr();
189 if (TII->isCompareZero(*MI))
190 Cmp0SrcReg = TII->getCompareSourceReg(*MI);
191 else if (MI->getDesc().hasImplicitDefOfPhysReg(SystemZ::CC) ||
192 definesCmp0Src(MI, /*CCDef=*/false))
193 Cmp0SrcReg = SystemZ::NoRegister;
194}
195
196/// Post-RA scheduling ///
197
198#ifndef NDEBUG
199// Print the set of SUs
200void SystemZPostRASchedStrategy::SUSet::
201dump(SystemZHazardRecognizer &HazardRec) const {
202 dbgs() << "{";
203 for (auto &SU : *this) {
204 HazardRec.dumpSU(SU, dbgs());
205 if (SU != *rbegin())
206 dbgs() << ", ";
207 }
208 dbgs() << "}\n";
209}
210#endif
211
212// Try to find a single predecessor that would be interesting for the
213// scheduler in the top-most region of MBB.
215 const MachineLoop *Loop) {
216 MachineBasicBlock *PredMBB = nullptr;
217 if (MBB->pred_size() == 1)
218 PredMBB = *MBB->pred_begin();
219
220 // The loop header has two predecessors, return the latch, but not for a
221 // single block loop.
222 if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
223 for (MachineBasicBlock *Pred : MBB->predecessors())
224 if (Loop->contains(Pred))
225 PredMBB = (Pred == MBB ? nullptr : Pred);
226 }
227
228 assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
229 && "Loop MBB should not consider predecessor outside of loop.");
230
231 return PredMBB;
232}
233
234void SystemZPostRASchedStrategy::
235advanceTo(MachineBasicBlock::iterator NextBegin) {
236 MachineBasicBlock::iterator LastEmittedMI = HazardRec->getLastEmittedMI();
238 ((LastEmittedMI != nullptr && LastEmittedMI->getParent() == MBB) ?
239 std::next(LastEmittedMI) : MBB->begin());
240
241 for (; I != NextBegin; ++I) {
242 if (I->isPosition() || I->isDebugInstr())
243 continue;
244 HazardRec->emitInstruction(&*I);
245 }
246}
247
249 Available.clear(); // -misched-cutoff.
250 LLVM_DEBUG(HazardRec->dumpState(););
251}
252
254 assert ((SchedStates.find(NextMBB) == SchedStates.end()) &&
255 "Entering MBB twice?");
256 LLVM_DEBUG(dbgs() << "** Entering " << printMBBReference(*NextMBB));
257
258 MBB = NextMBB;
259
260 /// Create a HazardRec for MBB, save it in SchedStates and set HazardRec to
261 /// point to it.
262 HazardRec = SchedStates[MBB] = new SystemZHazardRecognizer(TII, &SchedModel);
263 LLVM_DEBUG(const MachineLoop *Loop = MLI->getLoopFor(MBB);
264 if (Loop && Loop->getHeader() == MBB) dbgs() << " (Loop header)";
265 dbgs() << ":\n";);
266
267 // Try to take over the state from a single predecessor, if it has been
268 // scheduled. If this is not possible, we are done.
269 MachineBasicBlock *SinglePredMBB =
270 getSingleSchedPred(MBB, MLI->getLoopFor(MBB));
271 if (SinglePredMBB == nullptr)
272 return;
273 auto It = SchedStates.find(SinglePredMBB);
274 if (It == SchedStates.end())
275 return;
276
277 LLVM_DEBUG(dbgs() << "** Continued scheduling from "
278 << printMBBReference(*SinglePredMBB) << "\n";);
279
280 HazardRec->copyState(It->second);
281 LLVM_DEBUG(HazardRec->dumpState(););
282
283 // Emit incoming terminator(s). Be optimistic and assume that branch
284 // prediction will generally do "the right thing".
285 for (MachineInstr &MI : SinglePredMBB->terminators()) {
286 LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
287 bool TakenBranch = (MI.isBranch() &&
288 (TII->getBranchInfo(MI).isIndirect() ||
289 TII->getBranchInfo(MI).getMBBTarget() == MBB));
290 HazardRec->emitInstruction(&MI, TakenBranch);
291 if (TakenBranch)
292 break;
293 }
294}
295
297 LLVM_DEBUG(dbgs() << "** Leaving " << printMBBReference(*MBB) << "\n";);
298
299 // Advance to first terminator. The successor block will handle terminators
300 // dependent on CFG layout (T/NT branch etc).
301 advanceTo(MBB->getFirstTerminator());
302}
303
306 : MLI(C->MLI),
307 TII(static_cast<const SystemZInstrInfo *>
308 (C->MF->getSubtarget().getInstrInfo())),
309 MBB(nullptr), HazardRec(nullptr) {
310 const TargetSubtargetInfo *ST = &C->MF->getSubtarget();
311 SchedModel.init(ST);
312}
313
315 // Delete hazard recognizers kept around for each MBB.
316 for (auto I : SchedStates) {
317 SystemZHazardRecognizer *hazrec = I.second;
318 delete hazrec;
319 }
320}
321
324 unsigned NumRegionInstrs) {
325 // Don't emit the terminators.
326 if (Begin->isTerminator())
327 return;
328
329 // Emit any instructions before start of region.
330 advanceTo(Begin);
331}
332
333// Pick the next node to schedule.
335 // Only scheduling top-down.
336 IsTopNode = true;
337
338 if (Available.empty())
339 return nullptr;
340
341 // If only one choice, return it.
342 if (Available.size() == 1) {
343 LLVM_DEBUG(dbgs() << "** Only one: ";
344 HazardRec->dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
345 return *Available.begin();
346 }
347
348 // All nodes that are possible to schedule are stored in the Available set.
349 LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec););
350
351 Candidate Best;
352 for (auto *SU : Available) {
353
354 // SU is the next candidate to be compared against current Best.
355 Candidate c(SU, *HazardRec);
356
357 // Remeber which SU is the best candidate.
358 if (Best.SU == nullptr || c < Best) {
359 Best = c;
360 LLVM_DEBUG(dbgs() << "** Best so far: ";);
361 } else
362 LLVM_DEBUG(dbgs() << "** Tried : ";);
363 LLVM_DEBUG(HazardRec->dumpSU(c.SU, dbgs()); c.dumpCosts();
364 dbgs() << " Height:" << c.SU->getHeight(); dbgs() << "\n";);
365
366 // Once we know we have seen all SUs that affect grouping or use unbuffered
367 // resources, we can stop iterating if Best looks good.
368 if (!SU->isScheduleHigh && Best.noCost())
369 break;
370 }
371
372 assert (Best.SU != nullptr);
373 return Best.SU;
374}
375
376SystemZPostRASchedStrategy::Candidate::
377Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
378 SU = SU_;
379
380 // Check the grouping cost. For a node that must begin / end a
381 // group, it is positive if it would do so prematurely, or negative
382 // if it would fit naturally into the schedule.
383 GroupingCost = HazardRec.groupingCost(SU);
384
385 // Check the resources cost for this SU.
386 ResourcesCost = HazardRec.resourcesCost(SU);
387}
388
389bool SystemZPostRASchedStrategy::Candidate::
390operator<(const Candidate &other) {
391
392 // Check decoder grouping.
393 if (GroupingCost < other.GroupingCost)
394 return true;
395 if (GroupingCost > other.GroupingCost)
396 return false;
397
398 // Compare the use of resources.
399 if (ResourcesCost < other.ResourcesCost)
400 return true;
401 if (ResourcesCost > other.ResourcesCost)
402 return false;
403
404 // Higher SU is otherwise generally better.
405 if (SU->getHeight() > other.SU->getHeight())
406 return true;
407 if (SU->getHeight() < other.SU->getHeight())
408 return false;
409
410 // If all same, fall back to original order.
411 if (SU->NodeNum < other.SU->NodeNum)
412 return true;
413
414 return false;
415}
416
418 LLVM_DEBUG(dbgs() << "** Scheduling SU(" << SU->NodeNum << ") ";
419 if (Available.size() == 1) dbgs() << "(only one) ";
420 Candidate c(SU, *HazardRec); c.dumpCosts(); dbgs() << "\n";);
421
422 // Remove SU from Available set and update HazardRec.
423 Available.erase(SU);
424 HazardRec->EmitInstruction(SU);
425}
426
428 // Set isScheduleHigh flag on all SUs that we want to consider first in
429 // pickNode().
430 const MCSchedClassDesc *SC = HazardRec->getSchedClass(SU);
431 bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
432 SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
433
434 // Put all released SUs in the Available set.
435 Available.insert(SU);
436}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
MachineBasicBlock & MBB
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
if(PassOpts->AAPipeline)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static int biasPhysRegExtra(const SUnit *SU)
static MachineBasicBlock * getSingleSchedPred(MachineBasicBlock *MBB, const MachineLoop *Loop)
static bool isRegDef(const MachineOperand &MO)
Pre-RA scheduling ///.
static bool isPhysRegDef(const MachineOperand &MO)
MachineSchedPolicy RegionPolicy
ScheduleDAGMILive * DAG
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
SUnit * getSUnit() const
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Data
Regular data dependence (aka true-dependence).
Definition ScheduleDAG.h:55
Scheduling unit. This is a node in the scheduling DAG.
unsigned NodeNum
Entry # of node in the node vector.
bool isUnbuffered
Uses an unbuffered resource.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
bool isScheduleHigh
True if preferable to schedule high.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
unsigned WeakSuccsLeft
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
SystemZHazardRecognizer maintains the state for one MBB during scheduling.
int groupingCost(SUnit *SU) const
Return the cost of decoder grouping for SU.
void dumpSU(SUnit *SU, raw_ostream &OS) const
int resourcesCost(SUnit *SU)
Return the cost of SU in regards to processor resources usage.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
void leaveMBB() override
Tell the strategy that current MBB is done.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Called for a region before scheduling.
void schedNode(SUnit *SU, bool IsTopNode) override
ScheduleDAGMI has scheduled an instruction - tell HazardRec about it.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void enterMBB(MachineBasicBlock *NextMBB) override
Tell the strategy that MBB is about to be processed.
SystemZPostRASchedStrategy(const MachineSchedContext *C)
void releaseTopNode(SUnit *SU) override
SU has had all predecessor dependencies resolved.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
TargetSubtargetInfo - Generic base class for all target subtargets.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
LLVM_ABI unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...